In [1]:
library("lme4")
library("dplyr")
library("lmerTest")
get_sigfeatures = function(num_sigfigs, sigfeature_list, data_df, results_df){
  #Gather the significant features into a new dataframe
  for (i in 1:num_sigfigs) {
    feature1 <- sigfeature_list[i,1]
    feature_to_add <- data_df[feature1]
    results_df[,i] <- feature_to_add
  }
  return(results_df)
}

Loading required package: Matrix


Attaching package: ‘dplyr’


The following objects are masked from ‘package:stats’:

    filter, lag


The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union



Attaching package: ‘lmerTest’


The following object is masked from ‘package:lme4’:

    lmer


The following object is masked from ‘package:stats’:

    step




In [2]:
get_differentially_abundant_proteins= function(train1,test1,fold){
    num_proteins = ncol(train1)-4
    num_columns = ncol(train1)
    train_proteins = data.frame(train1[,5:num_columns])

    protein_names = data.frame(matrix(data=0,nrow=num_proteins,ncol=1))
    colnames(protein_names) = "Protein"
    protein_names[,1] = colnames(train_proteins)
    #head(protein_names)

    active_train_df = filter(train1,train1$Study_group == "Active")
    active_train_proteins = data.frame(active_train_df[,5:num_columns])
    inactive_train_df = filter(train1,train1$Study_group == "Inactive")
    inactive_train_proteins = data.frame(inactive_train_df[,5:num_columns])
    active_test_df = filter(test1,test1$Study_group == "Active")
    inactive_test_df = filter(test1,test1$Study_group == "Inactive")

    #make a dataframe to hold linear modeling results, active mean, inactive mean, 
    #fold-change and -log10(pvalue) and fold-change times -log10(pvalue)
    linear_modeling_results = data.frame(matrix(data=0,nrow=num_proteins,ncol=7))
    colnames(linear_modeling_results) = c("Protein","Nominal_pvalue","Active_mean","Inactive_mean","fold_change","the_log(pvalue)","fc_and_pvalue")
    linear_modeling_results[,1] = protein_names
    for(i in 1:num_proteins){
        current_protein = protein_names[i,1]
        linear_model = lmer(paste0(current_protein,"~ Study_group + (1|maskID)"),data=train1)
        pvalue = summary(linear_model)$coefficients[2,5]
        linear_modeling_results[i,2] = pvalue

    }

    #find the mean of each protein
    for(i in 1:num_proteins){
        active_protein = active_train_proteins[,i]
        active_mean = mean(active_protein)
        inactive_protein = inactive_train_proteins[,i]
        inactive_mean = mean(inactive_protein)
        linear_modeling_results[i,3] = active_mean
        linear_modeling_results[i,4] = inactive_mean
        linear_modeling_results[i,5] = log2(active_mean/inactive_mean)
        linear_pvalue = linear_modeling_results[i,2]
        linear_modeling_results[i,6] = -log10(linear_pvalue)
        linear_modeling_results[i,7] = -log10(linear_pvalue)*log2(active_mean/inactive_mean)
    }

    #head(linear_modeling_results)

    #find significant proteins for active gca
    significant_proteins = filter(linear_modeling_results,linear_modeling_results$Nominal_pvalue < 0.1)
    ordered_significant_proteins = significant_proteins[order(-significant_proteins$fc_and_pvalue),]
    significant_protein_names = data.frame(ordered_significant_proteins[,1])
    colnames(significant_protein_names) = "Protein"
    num_sig_proteins = nrow(significant_protein_names)
    #gather these proteins from the training data
    active_train_sig_proteins = data.frame(matrix(data=0,nrow=29,ncol=num_sig_proteins))
    colnames(active_train_sig_proteins) = significant_protein_names[,1]
    active_train_sig_proteins = get_sigfeatures(num_sig_proteins,significant_protein_names,active_train_df,active_train_sig_proteins)
    #head(active_train_sig_proteins)
    active_train_details = data.frame(matrix(data=0,nrow=29,ncol=4))
    colnames(active_train_details) = c("Study_group","maskID","sample_ID","Classifier")
    active_train_details$Study_group = active_train_df$Study_group
    active_train_details$maskID = active_train_df$maskID
    active_train_details$sample_ID = active_train_df$sample_ID
    active_train_details$Classifier = 1
    #head(active_train_details)
    all_active_train = cbind(active_train_details,active_train_sig_proteins)
    #dim(all_active_train)
    #head(all_active_train)

    #do the same for inactive gca
    inactive_train_sig_proteins = data.frame(matrix(data=0,nrow=29,ncol=num_sig_proteins))
    colnames(inactive_train_sig_proteins) = significant_protein_names[,1]
    inactive_train_sig_proteins=get_sigfeatures(num_sig_proteins,significant_protein_names,inactive_train_df,inactive_train_sig_proteins)
    #head(inactive_train_sig_proteins)

    inactive_train_details = data.frame(matrix(data=0,nrow=29,ncol=4))
    colnames(inactive_train_details) = c("Study_group","maskID","sample_ID","Classifier")
    inactive_train_details$Study_group = inactive_train_df$Study_group
    inactive_train_details$maskID = inactive_train_df$maskID
    inactive_train_details$sample_ID = inactive_train_df$sample_ID
    inactive_train_details$Classifier = 0
    all_inactive_train = cbind(inactive_train_details,inactive_train_sig_proteins)

    #combine all training data
    all_train_df = rbind(all_active_train,all_inactive_train)
    #dim(all_train_df)
    #head(all_train_df)
    train_file = paste0("../../analysis/classifying_active_vs_inactive_v2/differentially_abundant_proteins/train_0_10/train",fold)
    train_file = paste0(train_file,".csv")
    write.csv(all_train_df, train_file, row.names = FALSE)

    #do the same for the test data
    active_test_sig_proteins = data.frame(matrix(data=0,nrow=1,ncol=num_sig_proteins))
    colnames(active_test_sig_proteins) = significant_protein_names[,1]
    active_test_sig_proteins = get_sigfeatures(num_sig_proteins,significant_protein_names,active_test_df,active_test_sig_proteins)
    #head(active_test_sig_proteins)

    active_test_details = data.frame(matrix(data=0,nrow=1,ncol=4))
    colnames(active_test_details) = c("Study_group","maskID","sample_ID","Classifier")
    active_test_details$Study_group = active_test_df$Study_group
    active_test_details$maskID = active_test_df$maskID
    active_test_details$sample_ID = active_test_df$sample_ID
    active_test_details$Classifier = 1

    all_active_test = cbind(active_test_details,active_test_sig_proteins)
    #head(all_active_test)

    #same for inactive test
    inactive_test_sig_proteins = data.frame(matrix(data=0,nrow=1,ncol=num_sig_proteins))
    colnames(inactive_test_sig_proteins) = significant_protein_names[,1]
    inactive_test_sig_proteins = get_sigfeatures(num_sig_proteins,significant_protein_names,inactive_test_df,inactive_test_sig_proteins)
    #head(active_test_sig_proteins)

    inactive_test_details = data.frame(matrix(data=0,nrow=1,ncol=4))
    colnames(inactive_test_details) = c("Study_group","maskID","sample_ID","Classifier")
    inactive_test_details$Study_group = inactive_test_df$Study_group
    inactive_test_details$maskID = inactive_test_df$maskID
    inactive_test_details$sample_ID = inactive_test_df$sample_ID
    inactive_test_details$Classifier = 0

    all_inactive_test = cbind(inactive_test_details,inactive_test_sig_proteins)
    #head(all_inactive_test)

    #combine all test data
    all_test_df = rbind(all_active_test,all_inactive_test)
    #head(all_test_df)
    test_file = paste0("../../analysis/classifying_active_vs_inactive_v2/differentially_abundant_proteins/test_0_10/test",fold)
    test_file = paste0(test_file, ".csv")
    write.csv(all_test_df, test_file, row.names = FALSE)
    print("done")
    
    return
}

In [3]:
#read in train and test data
for(i in 1:30){
    train_file = paste0("../../analysis/classifying_active_vs_inactive_v2/all_proteins/train/train",i)
    train_file = paste0(train_file, ".csv")
    #print(train_file)
    train1 = read.csv(train_file)

    test_file = paste0("../../analysis/classifying_active_vs_inactive_v2/all_proteins/test/test",i)
    test_file = paste0(test_file,".csv")
    #print(test_file)
    test1 = read.csv(test_file)
    get_differentially_abundant_proteins(train1,test1,i)
}
print("done")

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) 

[1] "done"


boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) 

[1] "done"


boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) 

[1] "done"


boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) 

[1] "done"


boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) 

[1] "done"


boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) 

[1] "done"


boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) 

[1] "done"


boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) 

[1] "done"


boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) 

[1] "done"


boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) 

[1] "done"


boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

“Model failed to converge with max|grad| = 0.00699823 (tol = 0.002, component 1)”
boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('i

[1] "done"


boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) 

[1] "done"


boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) 

[1] "done"


boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

“Model failed to converge with max|grad| = 0.00335356 (tol = 0.002, component 1)”
boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('i

[1] "done"


boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

“Model failed to converge with max|grad| = 0.00309223 (tol = 0.002, component 1)”
boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('i

[1] "done"


boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) 

In [23]:
#read in train and test data
train1 = read.csv("../../analysis/classifying_active_vs_inactive_v2/all_proteins/train/train1.csv")

test1 = read.csv("../../analysis/classifying_active_vs_inactive_v2/all_proteins/test/test1.csv")


In [27]:
num_proteins = ncol(train1)-4
num_columns = ncol(train1)
train_proteins = data.frame(train1[,5:num_columns])

protein_names = data.frame(matrix(data=0,nrow=num_proteins,ncol=1))
colnames(protein_names) = "Protein"
protein_names[,1] = colnames(train_proteins)
#head(protein_names)

active_train_df = filter(train1,train1$Study_group == "Active")
active_train_proteins = data.frame(active_train_df[,5:num_columns])
inactive_train_df = filter(train1,train1$Study_group == "Inactive")
inactive_train_proteins = data.frame(inactive_train_df[,5:num_columns])
active_test_df = filter(test1,test1$Study_group == "Active")
inactive_test_df = filter(test1,test1$Study_group == "Inactive")

#make a dataframe to hold linear modeling results, active mean, inactive mean, 
#fold-change and -log10(pvalue) and fold-change times -log10(pvalue)
linear_modeling_results = data.frame(matrix(data=0,nrow=num_proteins,ncol=7))
colnames(linear_modeling_results) = c("Protein","Nominal_pvalue","Active_mean","Inactive_mean","fold_change","the_log(pvalue)","fc_and_pvalue")
linear_modeling_results[,1] = protein_names
for(i in 1:num_proteins){
    current_protein = protein_names[i,1]
    linear_model = lmer(paste0(current_protein,"~ Study_group + (1|maskID)"),data=train1)
    pvalue = summary(linear_model)$coefficients[2,5]
    linear_modeling_results[i,2] = pvalue
   
}
print("done")


boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) 

[1] "done"


In [35]:
#find the mean of each protein
for(i in 1:num_proteins){
    active_protein = active_train_proteins[,i]
    active_mean = mean(active_protein)
    inactive_protein = inactive_train_proteins[,i]
    inactive_mean = mean(inactive_protein)
    linear_modeling_results[i,3] = active_mean
    linear_modeling_results[i,4] = inactive_mean
    linear_modeling_results[i,5] = log2(active_mean/inactive_mean)
    linear_pvalue = linear_modeling_results[i,2]
    linear_modeling_results[i,6] = -log10(linear_pvalue)
    linear_modeling_results[i,7] = -log10(linear_pvalue)*log2(active_mean/inactive_mean)
}

head(linear_modeling_results)

Unnamed: 0_level_0,Protein,Nominal_pvalue,Active_mean,Inactive_mean,fold_change,the_log(pvalue),fc_and_pvalue
Unnamed: 0_level_1,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,CRBB2_10000.28,0.07662771,607.3205,640.3497,-0.07640182,1.1156142,-0.08523495
2,c.Raf_10001.7,0.53674462,348.2204,338.9374,0.03898192,0.2702323,0.01053417
3,ZNF41_10003.15,0.25446547,164.9148,179.8976,-0.1254552,0.5943711,-0.07456695
4,ELK1_10006.25,0.3175288,854.2003,1723.6976,-1.01286031,0.4982169,-0.5046241
5,GUC1A_10008.43,0.33098412,574.2866,517.848,0.14924231,0.4801928,0.07166509
6,BECN1_10010.10,0.30675362,348.9009,374.5179,-0.1022174,0.5132103,-0.05245903


In [75]:
#find significant proteins for active gca
significant_proteins = filter(linear_modeling_results,linear_modeling_results$Nominal_pvalue < 0.01)
ordered_significant_proteins = significant_proteins[order(-significant_proteins$fc_and_pvalue),]
significant_protein_names = data.frame(ordered_significant_proteins[,1])
colnames(significant_protein_names) = "Protein"
num_sig_proteins = nrow(significant_protein_names)
#gather these proteins from the training data
active_train_sig_proteins = data.frame(matrix(data=0,nrow=29,ncol=num_sig_proteins))
colnames(active_train_sig_proteins) = significant_protein_names[,1]
active_train_sig_proteins = get_sigfeatures(num_sig_proteins,significant_protein_names,active_train_df,active_train_sig_proteins)
#head(active_train_sig_proteins)
active_train_details = data.frame(matrix(data=0,nrow=29,ncol=4))
colnames(active_train_details) = c("Study_group","maskID","sample_ID","Classifier")
active_train_details$Study_group = active_train_df$Study_group
active_train_details$maskID = active_train_df$maskID
active_train_details$sample_ID = active_train_df$sample_ID
active_train_details$Classifier = 1
#head(active_train_details)
all_active_train = cbind(active_train_details,active_train_sig_proteins)
#dim(all_active_train)
#head(all_active_train)

#do the same for inactive gca
inactive_train_sig_proteins = data.frame(matrix(data=0,nrow=29,ncol=num_sig_proteins))
colnames(inactive_train_sig_proteins) = significant_protein_names[,1]
inactive_train_sig_proteins=get_sigfeatures(num_sig_proteins,significant_protein_names,inactive_train_df,inactive_train_sig_proteins)
#head(inactive_train_sig_proteins)

inactive_train_details = data.frame(matrix(data=0,nrow=29,ncol=4))
colnames(inactive_train_details) = c("Study_group","maskID","sample_ID","Classifier")
inactive_train_details$Study_group = inactive_train_df$Study_group
inactive_train_details$maskID = inactive_train_df$maskID
inactive_train_details$sample_ID = inactive_train_df$sample_ID
inactive_train_details$Classifier = 0
all_inactive_train = cbind(inactive_train_details,inactive_train_sig_proteins)

#combine all training data
all_train_df = rbind(all_active_train,all_inactive_train)
#dim(all_train_df)
#head(all_train_df)
write.csv(all_train_df,"../../analysis/classifying_active_vs_inactive_v2/differentially_abundant_proteins/train/train1.csv",row.names=FALSE)

#do the same for the test data
active_test_sig_proteins = data.frame(matrix(data=0,nrow=1,ncol=num_sig_proteins))
colnames(active_test_sig_proteins) = significant_protein_names[,1]
active_test_sig_proteins = get_sigfeatures(num_sig_proteins,significant_protein_names,active_test_df,active_test_sig_proteins)
#head(active_test_sig_proteins)

active_test_details = data.frame(matrix(data=0,nrow=1,ncol=4))
colnames(active_test_details) = c("Study_group","maskID","sample_ID","Classifier")
active_test_details$Study_group = active_test_df$Study_group
active_test_details$maskID = active_test_df$maskID
active_test_details$sample_ID = active_test_df$sample_ID
active_test_details$Classifier = 1

all_active_test = cbind(active_test_details,active_test_sig_proteins)
#head(all_active_test)

#same for inactive test
inactive_test_sig_proteins = data.frame(matrix(data=0,nrow=1,ncol=num_sig_proteins))
colnames(inactive_test_sig_proteins) = significant_protein_names[,1]
inactive_test_sig_proteins = get_sigfeatures(num_sig_proteins,significant_protein_names,inactive_test_df,inactive_test_sig_proteins)
#head(active_test_sig_proteins)

inactive_test_details = data.frame(matrix(data=0,nrow=1,ncol=4))
colnames(inactive_test_details) = c("Study_group","maskID","sample_ID","Classifier")
inactive_test_details$Study_group = inactive_test_df$Study_group
inactive_test_details$maskID = inactive_test_df$maskID
inactive_test_details$sample_ID = inactive_test_df$sample_ID
inactive_test_details$Classifier = 0

all_inactive_test = cbind(inactive_test_details,inactive_test_sig_proteins)
#head(all_inactive_test)

#combine all test data
all_test_df = rbind(all_active_test,all_inactive_test)
#head(all_test_df)
write.csv(all_test_df,"../../analysis/classifying_active_vs_inactive_v2/differentially_abundant_proteins/test/test1.csv",row.names=FALSE)