# This model combines STRING, BrainSpan, and TADA statistics to generate gene-risk prediction scores


In [1]:
library(randomForest)

randomForest 4.7-1

Type rfNews() to see new features/changes/bug fixes.



In [2]:
#load pre-processed data files and layer 1 model scores
load("rData/training_labels.Rdata")
load("rData/id_conversion.Rdata")
load("models/Brainspan_score.Rdata")
load("models/String_score.Rdata")

# Integrating STRING, BrainSpan and TADA scores
common_rows = rownames(string_preds)[rownames(string_preds)%in%rownames(bs_preds)]
symbol = entrezToSymbol[names(enzToEns)[match(common_rows,enzToEns)]]



x_preds = data.frame(
  string = string_preds[common_rows, "TRUE"],
  brainspan = bs_preds[common_rows, "TRUE"]
  )

x_preds = na.roughfix(x_preds)

# all examples as input features
x_train = x_preds[rownames(x_preds) %in% c(pos_genes,neg_genes), ]
# only positive examples as lables
y_train = as.factor(rownames(x_train) %in% pos_genes)

# Training a random forest model
set.seed(5393)

integrated_model = 
  randomForest(
    y = y_train,
    x = x_train[,c("string", "brainspan")],
    importance = T,
    strata = y_train,
    sampsize = c(76,76),
    do.trace = 50,
    ntree = 1000)

# get all remaining predictions
integrated_preds = predict(integrated_model,
                          x_preds[!rownames(x_preds) %in% rownames(integrated_model$votes), c("string", "brainspan")],
                          type=  "prob")
integrated_preds = rbind(integrated_preds, integrated_model$votes)

# save the integrated score model
save(integrated_model, integrated_preds, file="models/integrated_score.Rdata")
print("Model saved")

ntree      OOB      1      2
   50:  12.45% 11.80% 21.05%
  100:  12.83% 12.30% 19.74%
  150:  12.73% 12.20% 19.74%
  200:  12.36% 11.80% 19.74%
  250:  12.45% 11.80% 21.05%
  300:  12.17% 11.40% 22.37%
  350:  11.99% 11.30% 21.05%
  400:  12.17% 11.50% 21.05%
  450:  12.27% 11.60% 21.05%
  500:  12.27% 11.60% 21.05%
  550:  12.36% 11.70% 21.05%
  600:  12.27% 11.60% 21.05%
  650:  12.17% 11.50% 21.05%
  700:  12.08% 11.40% 21.05%
  750:  12.27% 11.50% 22.37%
  800:  12.36% 11.60% 22.37%
  850:  12.27% 11.60% 21.05%
  900:  12.27% 11.60% 21.05%
  950:  12.17% 11.50% 21.05%
 1000:  12.17% 11.50% 21.05%
[1] "Model saved"
