This script uses the quantile normalized data as well as the clinical and demographic data to build linear regression models between Active GCA and Inactive GCA patients. Marginal random effect linear regression models were run on six clinical and demographic variables (age, smoking status, sex, prednisone use, aspirin use, and methotrexate use)between Active GCA and Inactive GCA. Variables with random effect linear regression model P-values < 0.05 were identified as significant confounders and included in the full random effect linear regression models. In the full random effect linear regression models, the P-value from the study group variable was used to identify differentially abundant proteins between Active GCA and Inactive GCA. A threshold of P < 0.01 was applied to all plasma proteins for significance.

In [None]:
library("lme4")
library("dplyr")
library("lmerTest")

#read in the proteomics data and the patient demographics data
all_quantile_df = read.csv("../../data/all_quantile_data.csv")
vasculitis_df = filter( all_quantile_df,all_quantile_df$Study_group != "Healthy_Control")
dim(vasculitis_df)
#View(vasculitis_df)
vasculitis_demographics_df <- read.csv("../../data/vasculitis_patient_info.csv")
vasculitis_df$maskID == vasculitis_demographics_df$maskid
vasculitis_df = vasculitis_df[order(vasculitis_df$maskID),]
vasculitis_demographics_df = vasculitis_demographics_df[order(vasculitis_demographics_df$maskid),]
vasculitis_df$maskID == vasculitis_demographics_df$maskid


#get the number of columns and rows 
num_columns <- ncol(vasculitis_df)
num_proteins <- num_columns - 3
num_samples <- nrow(vasculitis_df)

#seperate just the proteins to use later
proteins_only <- data.frame(vasculitis_df[ , 4:num_columns])

#seperate the protein names for the linear model
protein_names <- data.frame(colnames(proteins_only))
colnames(protein_names) <- "Protein"
protein_names[ , 1] <- colnames(proteins_only)

#make a dataframe to hold the values
grid_search_pvalues <- data.frame(matrix(data = 0, nrow = num_proteins, ncol = 8))
colnames(grid_search_pvalues) <- cbind("Protein","Study_group","Age","Sex","Smoking", "Prednisone", "Methotrexate", "Aspirin")
grid_search_pvalues[, 1] <- protein_names

grid_search_binary <- data.frame(matrix(data = 0, nrow = num_proteins, ncol = 8))
colnames(grid_search_binary) <- cbind("Protein","Study_group","Age","Sex","Smoking", "Prednisone", "Methotrexate", "Aspirin")
grid_search_binary[, 1] <- protein_names



In [15]:
#order both the demographics data and the protein data in the same order using the maskid
active_demographics = filter(vasculitis_demographics_df,vasculitis_demographics_df$Study_group == "Active")
active_demographics = active_demographics[order(active_demographics$maskid),]
#head(active_demographics)

active_df = filter(all_quantile_df,all_quantile_df$Study_group == "Active")
active_df = active_df[order(active_df$maskID),]
#head(active_df)
print(active_demographics$maskid == active_df$maskID)


inactive_demographics = filter(vasculitis_demographics_df,vasculitis_demographics_df$Study_group == "Inactive")
inactive_demographics = inactive_demographics[order(inactive_demographics$maskid),]
#head(inactive_demographics)

inactive_df = filter(all_quantile_df,all_quantile_df$Study_group == "Inactive")
inactive_df = inactive_df[order(inactive_df$maskID),]
print(inactive_demographics$maskid == inactive_df$maskID)

 [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
 [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE


In [27]:
#combine all active proteins and demographics
active_proteins_and_demographics = data.frame(active_df[,4:num_columns])
active_proteins_and_demographics$Study_group = active_df$Study_group
active_proteins_and_demographics$maskID = active_df$maskID
active_proteins_and_demographics$Age = active_demographics$Age
active_proteins_and_demographics$Smoking = active_demographics$Smoking
active_proteins_and_demographics$Prednisone = active_demographics$PrednisoneCurrentlyReceiving
active_proteins_and_demographics$Methotrexate = active_demographics$MethotrexateWasTaken
active_proteins_and_demographics$Aspirin = active_demographics$Aspirin
active_proteins_and_demographics$Sex = active_demographics$Sex
dim(active_proteins_and_demographics)
head(active_proteins_and_demographics)

Unnamed: 0_level_0,CRBB2_10000.28,c.Raf_10001.7,ZNF41_10003.15,ELK1_10006.25,GUC1A_10008.43,BECN1_10010.10,OCRL_10011.65,SPDEF_10012.5,SLUG_10014.31,KCAB2_10015.119,⋯,UBXN4.CD.1_9997.12,IRF6_9999.1,Study_group,maskID,Age,Smoking,Prednisone,Methotrexate,Aspirin,Sex
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<chr>,<chr>,<dbl>,<chr>,<chr>,<chr>,<chr>,<chr>
1,736.6811,263.2578,164.6267,1016.69,488.21,317.2122,3847.661,2069.718,893.89,512.1578,⋯,8510.258,1209.892,Active,GCA-1,69.0,No,No,No,No,Male
10,604.3556,406.9444,142.8822,617.9922,600.3578,388.8356,9311.811,1856.36,1081.9444,734.8233,⋯,7314.908,1248.33,Active,GCA-10,71.7,No,No,No,Yes,Female
11,1347.5344,294.2639,149.5111,4369.2867,460.6233,304.0489,3287.906,1425.487,825.12,538.1511,⋯,16116.304,1959.607,Active,GCA-11,79.6,No,No,No,No,Male
12,549.8122,254.1578,157.4,594.6433,458.1667,325.8228,4158.62,2361.511,1072.83,507.4411,⋯,5191.104,1031.876,Active,GCA-12,63.1,Yes,Yes,No,Yes,Female
13,506.3311,242.9217,145.4156,645.4678,404.3,347.9956,2930.553,1587.739,888.0244,505.1778,⋯,12623.967,1304.062,Active,GCA-13,65.3,Yes,No,Past Week,Yes,Female
14,691.6233,281.0356,152.2789,563.7733,474.7361,321.8661,2860.513,1582.086,1006.3389,690.3122,⋯,7172.306,1289.327,Active,GCA-14,72.4,No,Yes,No,Yes,Female


In [28]:
#combine all the inactive proteins and demographics
inactive_proteins_and_demographics = data.frame(inactive_df[,4:num_columns])
inactive_proteins_and_demographics$Study_group = inactive_df$Study_group
inactive_proteins_and_demographics$maskID = inactive_df$maskID
inactive_proteins_and_demographics$Age = inactive_demographics$Age
inactive_proteins_and_demographics$Smoking = inactive_demographics$Smoking
inactive_proteins_and_demographics$Prednisone = inactive_demographics$PrednisoneCurrentlyReceiving
inactive_proteins_and_demographics$Methotrexate = inactive_demographics$MethotrexateWasTaken
inactive_proteins_and_demographics$Aspirin = inactive_demographics$Aspirin
inactive_proteins_and_demographics$Sex = inactive_demographics$Sex
dim(inactive_proteins_and_demographics)
head(inactive_proteins_and_demographics)

Unnamed: 0_level_0,CRBB2_10000.28,c.Raf_10001.7,ZNF41_10003.15,ELK1_10006.25,GUC1A_10008.43,BECN1_10010.10,OCRL_10011.65,SPDEF_10012.5,SLUG_10014.31,KCAB2_10015.119,⋯,UBXN4.CD.1_9997.12,IRF6_9999.1,Study_group,maskID,Age,Smoking,Prednisone,Methotrexate,Aspirin,Sex
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<chr>,<chr>,<dbl>,<chr>,<chr>,<chr>,<chr>,<chr>
1,689.0178,274.6056,173.1122,1164.7911,522.5844,322.5689,4303.878,1867.701,887.3033,516.4644,⋯,8440.326,1184.1156,Inactive,GCA-1,69.3,No,Yes,No,No,Male
10,509.9244,435.6033,174.2522,657.91,508.9383,318.3378,4746.134,1495.139,967.99,480.59,⋯,26205.253,3649.0967,Inactive,GCA-10,72.2,No,No,No,Yes,Female
11,1503.5411,401.6656,141.6878,29009.8722,390.9911,272.8089,2275.52,1223.574,736.3722,685.2467,⋯,26205.253,3216.15,Inactive,GCA-11,80.0,No,Yes,No,No,Male
12,567.8378,260.8228,136.4156,596.1078,584.7544,328.1489,3847.661,2062.059,1145.1711,480.78,⋯,4930.646,964.1144,Inactive,GCA-12,63.6,Yes,Yes,No,Yes,Female
13,551.0106,244.7106,159.075,549.3733,381.5361,350.1,2553.006,1686.678,921.2056,559.3556,⋯,11896.302,1195.5356,Inactive,GCA-13,65.6,Yes,No,No,Yes,Female
14,818.7789,251.7978,159.7256,569.9722,484.7578,342.8111,3274.356,1718.964,1066.5344,550.25,⋯,6016.766,833.8333,Inactive,GCA-14,72.8,No,No,No,No,Female


In [33]:
#combine everything to run random effect models on
proteins_and_demographics = rbind(active_proteins_and_demographics,inactive_proteins_and_demographics)
dim(proteins_and_demographics)
proteins_and_demographics

Unnamed: 0_level_0,CRBB2_10000.28,c.Raf_10001.7,ZNF41_10003.15,ELK1_10006.25,GUC1A_10008.43,BECN1_10010.10,OCRL_10011.65,SPDEF_10012.5,SLUG_10014.31,KCAB2_10015.119,⋯,UBXN4.CD.1_9997.12,IRF6_9999.1,Study_group,maskID,Age,Smoking,Prednisone,Methotrexate,Aspirin,Sex
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<chr>,<chr>,<dbl>,<chr>,<chr>,<chr>,<chr>,<chr>
1,736.6811,263.2578,164.6267,1016.69,488.21,317.2122,3847.661,2069.7178,893.89,512.1578,⋯,8510.258,1209.8922,Active,GCA-1,69.0,No,No,No,No,Male
10,604.3556,406.9444,142.8822,617.9922,600.3578,388.8356,9311.811,1856.36,1081.9444,734.8233,⋯,7314.908,1248.33,Active,GCA-10,71.7,No,No,No,Yes,Female
11,1347.5344,294.2639,149.5111,4369.2867,460.6233,304.0489,3287.906,1425.4867,825.12,538.1511,⋯,16116.304,1959.6067,Active,GCA-11,79.6,No,No,No,No,Male
12,549.8122,254.1578,157.4,594.6433,458.1667,325.8228,4158.62,2361.5111,1072.83,507.4411,⋯,5191.104,1031.8756,Active,GCA-12,63.1,Yes,Yes,No,Yes,Female
13,506.3311,242.9217,145.4156,645.4678,404.3,347.9956,2930.553,1587.7394,888.0244,505.1778,⋯,12623.967,1304.0622,Active,GCA-13,65.3,Yes,No,Past Week,Yes,Female
14,691.6233,281.0356,152.2789,563.7733,474.7361,321.8661,2860.513,1582.0856,1006.3389,690.3122,⋯,7172.306,1289.3272,Active,GCA-14,72.4,No,Yes,No,Yes,Female
15,494.1167,470.4294,162.6856,727.7,431.0878,280.7622,2119.392,1159.37,737.2367,687.8911,⋯,28742.35,4653.46,Active,GCA-15,58.7,Yes,Yes,No,Yes,Female
16,624.2922,279.9939,166.6378,517.6278,430.66,337.0739,2743.277,1804.4633,949.4356,742.2694,⋯,7049.581,1187.7756,Active,GCA-16,77.9,No,Yes,No,No,Male
17,588.96,277.9178,149.5111,987.3911,722.1078,358.865,4870.271,1152.5689,911.4456,561.0828,⋯,12531.108,3868.2422,Active,GCA-17,79.4,No,Yes,No,No,Female
18,540.9278,671.7306,158.0211,821.8389,435.0906,313.1772,2205.246,1184.58,777.0256,670.2089,⋯,33917.759,8685.77,Active,GCA-18,74.3,Yes,Yes,No,Yes,Female


In [34]:
head(grid_search_binary)

Unnamed: 0_level_0,Protein,Study_group,Age,Sex,Smoking,Prednisone,Methotrexate,Aspirin
Unnamed: 0_level_1,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,CRBB2_10000.28,0,0,0,0,0,0,0
2,c.Raf_10001.7,0,0,0,0,0,0,0
3,ZNF41_10003.15,0,0,0,0,0,0,0
4,ELK1_10006.25,0,0,0,0,0,0,0
5,GUC1A_10008.43,0,0,0,0,0,0,0
6,BECN1_10010.10,0,0,0,0,0,0,0


In [35]:
for(i in 1:num_proteins){
    protein_name <- protein_names[i,1]
    study_group_model <- lmer(paste0(protein_name, " ~ Study_group + (1|maskID)"), data = proteins_and_demographics)
    pvalue1 <- summary(study_group_model)$coefficients[2,5]
    grid_search_pvalues[i,2] <- pvalue1
    
    age_model <- lmer(paste0(protein_name, " ~ Age + (1|maskID)"), data = proteins_and_demographics)
    pvalue2 <- summary(age_model)$coefficients[2,5]
    grid_search_pvalues[i,3] <- pvalue2

    sex_model <- lmer(paste0(protein_name, " ~ Sex + (1|maskID)"), data = proteins_and_demographics)
    pvalue3 <- summary(sex_model)$coefficients[2,5]
    grid_search_pvalues[i,4] <- pvalue3
    
    smoking_model <- lmer(paste0(protein_name, " ~ Smoking + (1|maskID)"), data = proteins_and_demographics)
    pvalue4<- summary(smoking_model)$coefficients[2,5]
    grid_search_pvalues[i,5] <- pvalue4
    
    prednisone_model <- lmer(paste0(protein_name, " ~ Prednisone + (1|maskID)"), data = proteins_and_demographics)
    pvalue5<- summary(prednisone_model)$coefficients[2,5]
    grid_search_pvalues[i,6] <- pvalue5   
    
    methotrexate_model <- lmer(paste0(protein_name, " ~ Methotrexate + (1|maskID)"), data = proteins_and_demographics)
    pvalue6<- summary(methotrexate_model)$coefficients[2,5]
    grid_search_pvalues[i,7] <- pvalue6   
    
    aspirin_model <- lmer(paste0(protein_name, " ~ Aspirin + (1|maskID)"), data = proteins_and_demographics)
    pvalue7<- summary(aspirin_model)$coefficients[2,5]
    grid_search_pvalues[i,8] <- pvalue7   
    
}
print("done with models.")

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) 

[1] "done with models."


In [36]:
head(grid_search_pvalues)

Unnamed: 0_level_0,Protein,Study_group,Age,Sex,Smoking,Prednisone,Methotrexate,Aspirin
Unnamed: 0_level_1,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,CRBB2_10000.28,0.09473738,0.10041503,0.27593591,0.1824972,0.6454117,0.802496587,4.975114e-05
2,c.Raf_10001.7,0.55398949,0.96135967,0.97514011,0.2495492,0.43645549,0.346417197,0.240612
3,ZNF41_10003.15,0.24497714,0.51149656,0.32102858,0.58592,0.29973549,0.002977795,0.7878182
4,ELK1_10006.25,0.31436509,0.21359762,0.04849937,0.3306173,0.36929339,0.631863448,0.3292615
5,GUC1A_10008.43,0.34122022,0.41132586,0.4774049,0.4204667,0.71752614,0.858669361,0.5555128
6,BECN1_10010.10,0.30294704,0.04326037,0.72275646,0.416336,0.04169712,0.980527137,0.3486288


In [49]:
#add protein name and save

#save the results read.csv("../../data/vasculitis_patient_info.csv")
write.csv(grid_search_pvalues, file = "../../analysis/linear_modeling_active_and_inactive/grid_search_pvalue_results.csv", row.names = FALSE)

In [37]:
#now go through and binarize the data, if the pvalue is less than 0.05 then in the binarized matrix the value will be 1 otherwise it will be 0
#grid_search_binary
#grid_search_pvalues
for(i in 2:7){
    for(j in 1:num_proteins){
        current_pval <- grid_search_pvalues[j,i]
        if(current_pval < 0.05){
            grid_search_binary[j,i] <- 1
        }
        else{
            grid_search_binary[j,i] <- 0
        }
    }    
}


In [50]:
#save the output
write.csv(grid_search_binary, file = "../../analysis/linear_modeling_active_and_inactive/grid_search_binary_results.csv", row.names = FALSE)
#filter out the proteins without significance for the study_group
significant_study_group_proteins <- filter(grid_search_binary,grid_search_binary$Study_group == 1)
#identify the proteins that are not significant for any of the other covariates
proteins_with_no_covariates <- filter(significant_study_group_proteins,significant_study_group_proteins$Age == 0 & significant_study_group_proteins$Aspirin == 0 & significant_study_group_proteins$Methotrexate == 0 & significant_study_group_proteins$Prednisone == 0 & significant_study_group_proteins$Smoking == 0 )


In [39]:
head(grid_search_binary)
dim(grid_search_binary)
grid_search_binary$Study_group = 1

Unnamed: 0_level_0,Protein,Study_group,Age,Sex,Smoking,Prednisone,Methotrexate,Aspirin
Unnamed: 0_level_1,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,CRBB2_10000.28,0,0,0,0,0,0,0
2,c.Raf_10001.7,0,0,0,0,0,0,0
3,ZNF41_10003.15,0,0,0,0,0,1,0
4,ELK1_10006.25,0,0,1,0,0,0,0
5,GUC1A_10008.43,0,0,0,0,0,0,0
6,BECN1_10010.10,0,1,0,0,1,0,0


In [40]:
#Protein	Study_group	Age	Smoking	Prednisone	Methotrexate	Aspirin
#function_name <- function(arg_1, arg_2, ...) {
#   Function body 
#}
make_linear_model <- function(binary_results){
    #binary_results is the row from the binary data, 7 columns with 6 values
    model_string = " "
    if(binary_results[1,2] == 1){
        model_string = paste0(model_string,"~ Study_group + (1|maskID)")
    }
    if(binary_results[1,3] == 1){
        model_string = paste0(model_string," + Age")
    }
    if(binary_results[1,4] == 1){
        model_string = paste0(model_string," + Sex")
    }
    if(binary_results[1,5] == 1){
        model_string = paste0(model_string," + Smoking")
    }
    if(binary_results[1,6] == 1){
        model_string = paste0(model_string," + Prednisone")
    }    
    if(binary_results[1,7] == 1){
        model_string = paste0(model_string," + Methotrexate")
    }    
    if(binary_results[1,8] == 1){
        model_string = paste0(model_string," + Aspirin")
    }    
    return(model_string)
}

In [41]:
#make a data frame to hold the results, nominal p value for study group and also save the model as a string
linear_models_with_covariates_results <- data.frame(matrix(data = 0, nrow = num_proteins, ncol = 3))
colnames(linear_models_with_covariates_results) <- cbind("Protein", "Nominal_pvalue", "formula")
linear_models_with_covariates_results[ , 1] <- protein_names
for(i in 1:num_proteins){
    current_protein_binary <- grid_search_binary[i,]
    study_group_binary <- current_protein_binary[1,2]
    if(study_group_binary == 1){
    model_protein <- grid_search_binary[i,1]
    model_string <- " "
    model_string <- make_linear_model(current_protein_binary)
    linear_model1 <- lmer(paste0(model_protein, model_string), data = proteins_and_demographics)
    pvalue <- summary(linear_model1)$coefficients[2,5]
    linear_models_with_covariates_results[i,2] <- pvalue
    linear_models_with_covariates_results[i,3] <- model_string
    }
    if(study_group_binary == 0){
        linear_models_with_covariates_results[i,2] <- 1
        linear_models_with_covariates_results[i,3] <- "not significant for study_group"
    }
    if(i %% 1000 == 0){
        print("model ")
        print(i)
    }
}
print("done with linear models")

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')



[1] "model "
[1] 1000


boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')



[1] "model "
[1] 2000


boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')



[1] "model "
[1] 3000


boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')



[1] "model "
[1] 4000


boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')



[1] "model "
[1] 5000


boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')



[1] "model "
[1] 6000


boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')



[1] "model "
[1] 7000


boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')



[1] "done with linear models"


In [42]:
#find the fold changes for active and inactive using the mean
active_df <- filter(vasculitis_df,vasculitis_df$Study_group == "Active")
dim(active_df)
active_proteins_only <- active_df[,4:num_columns]
dim(active_proteins_only)

inactive_df <- filter(vasculitis_df,vasculitis_df$Study_group == "Inactive")
dim(inactive_df)
inactive_proteins_only <- inactive_df[,4:num_columns]
dim(inactive_proteins_only)

In [43]:
#find the means
vasculitis_fold_change <- data.frame(matrix(data = 0,nrow=num_proteins,ncol=4))
colnames(vasculitis_fold_change) <- cbind("Protein","Active_mean","Inactive_mean","Fold_change")
vasculitis_fold_change[,1] <- protein_names
for(i in 1:num_proteins){
    active_mean <- mean(active_proteins_only[,i])
  inactive_mean <- mean(inactive_proteins_only[,i])
    
   vasculitis_fold_change[i,2] <- active_mean
   vasculitis_fold_change[i,3] <- inactive_mean
}
vasculitis_fold_change[,4] <- log2(vasculitis_fold_change[,2]/vasculitis_fold_change[,3])
#add to results
linear_models_with_covariates_results$fold_change <- vasculitis_fold_change$Fold_change

#add adjusted p values
linear_models_with_covariates_results$adjusted_pvalue <- p.adjust(linear_models_with_covariates_results$Nominal_pvalue, method="hochberg")


In [44]:
protein_key <- read.csv("../../data/key_for_protein_names.csv")

In [48]:
#add target name
linear_models_with_covariates_results$Target <- protein_key[,2]
linear_models_with_covariates_results$Entrez <- protein_key[,3]
#save everything
write.csv(linear_models_with_covariates_results,"../../analysis/linear_modeling_active_and_inactive/linear_models_with_covariates_results.csv", row.names=FALSE)
significant_linear_models_with_covariates_results <- filter(linear_models_with_covariates_results,linear_models_with_covariates_results$Nominal_pvalue < 0.01)
significant_linear_models_with_covariates_results <- significant_linear_models_with_covariates_results[order(significant_linear_models_with_covariates_results$Nominal_pvalue),]

increasing_significant_linear_models_with_covariates_results <- filter(significant_linear_models_with_covariates_results,significant_linear_models_with_covariates_results$fold_change > 0)
dim(increasing_significant_linear_models_with_covariates_results)
write.csv(increasing_significant_linear_models_with_covariates_results,"../../analysis/linear_modeling_active_and_inactive/increasing_significant_linear_models_with_covariates_results.csv",row.names=FALSE)

decreasing_significant_linear_models_with_covariates_results <- filter(significant_linear_models_with_covariates_results,significant_linear_models_with_covariates_results$fold_change < 0)
dim(decreasing_significant_linear_models_with_covariates_results)
write.csv(decreasing_significant_linear_models_with_covariates_results,"../../analysis/linear_modeling_active_and_inactive/decreasing_significant_linear_models_with_covariates_results.csv",row.names=FALSE)

In [46]:
linear_models_with_covariates_results

Protein,Nominal_pvalue,formula,fold_change,adjusted_pvalue,Target,Entrez
<chr>,<dbl>,<chr>,<dbl>,<dbl>,<chr>,<chr>
CRBB2_10000.28,0.09473738,~ Study_group + (1|maskID),-0.0698450919,0.9999376,CRBB2,1415
c.Raf_10001.7,0.55398949,~ Study_group + (1|maskID),0.0363571250,0.9999376,c-Raf,5894
ZNF41_10003.15,0.39184987,~ Study_group + (1|maskID) + Methotrexate,-0.1237242251,0.9999376,ZNF41,7592
ELK1_10006.25,0.31436509,~ Study_group + (1|maskID) + Sex,-0.9880633995,0.9999376,ELK1,2002
GUC1A_10008.43,0.34122022,~ Study_group + (1|maskID),0.1415765439,0.9999376,GUC1A,2978
BECN1_10010.10,0.36480887,~ Study_group + (1|maskID) + Age + Prednisone,-0.0999057903,0.9999376,BECN1,8678
OCRL_10011.65,0.99601705,~ Study_group + (1|maskID),-0.0004990391,0.9999376,OCRL,4952
SPDEF_10012.5,0.88402086,~ Study_group + (1|maskID),-0.0093815001,0.9999376,SPDEF,25803
SLUG_10014.31,0.05672059,~ Study_group + (1|maskID) + Age,-0.0924165951,0.9999376,SLUG,6591
KCAB2_10015.119,0.53585320,~ Study_group + (1|maskID) + Age,-0.1469599857,0.9999376,KCAB2,8514
