*Ewing et al (2020) Structural variants at the BRCA1/2 loci are a common source of homologous repair deficiency in high grade serous ovarian carcinoma*

# Notebook 9c - Multivariable modelling using elastic net on genomic features only 


## Load libraries

In [None]:
library(tximport)
require(DESeq2)
library(caret)
library(caTools)

## Load dataset

In [5]:
load("Robjecttosetup_reducedmultivariablemodel.RData")

## Model

This section performs the following steps 100 times:

- Partition the data
- Fit an elastic net on the training set using 10-fold cross validation to determine parameters
- Extract model coefficients
- Extract model performance metrics

In [11]:
df<-numeric()
perf_list<-numeric()

performance_metrics = function(actual, predicted) {
  acc<-sum(actual == predicted)/length(actual)
  TP<-  sum(actual=='HRD'& predicted=='HRD')
  TN<-  sum(actual=='HRP'& predicted=='HRP')
  FP<-  sum(actual=='HRD'& predicted=='HRP')
  FN<-  sum(actual=='HRP'& predicted=='HRD')  
  sensitivity<-TP/(TP+FN)  
  specificity<-TN/(TN+FP)
  auc<-  colAUC(as.numeric(predict(model, test.data)), test.data$HRDeficient)
  r<-list(Accuracy=acc,Sensitivity=sensitivity,Specificity=specificity,AUC=auc[1])
    return(r)
}

i<-0
while (length(df) <2200 | length(df)==0 ){
  i<-i+1
    print(i)
    print(": Setting seed...")
    set.seed(489+i)

    print("Partitioning data...")
    train = sample(1:nrow(sampleInfo_analysis), nrow(sampleInfo_analysis)*0.8)
    train.data<-sampleInfo_analysis[train,]
    test.data<-sampleInfo_analysis[-train,]
    
    if (dim(table(train.data$BRCA1_LOH))==1){
        next
    }
    
    print("Fit elastic net...")
    model <- train(HRDeficient ~., 
                   data = train.data, 
                   method = "glmnet",
                   trControl = trainControl("cv", 
                                            number = 10,
                                            summaryFunction = twoClassSummary,
                                            classProbs = TRUE),
                   standardize=TRUE,                          
                   tuneLength = 10
    )

    print("Get coefficients...")
    res<-coef(model$finalModel, model$bestTune$lambda)

    df<-cbind(df,as.numeric(res))
    
    perf_list<-cbind(perf_list,performance_metrics(actual = test.data$HRDeficient,
         predicted = predict(model, newdata = test.data)))  
}

save(df,file="reduced_df_1_100_loh.RData")
save(perf_list,file="reduced_df_1_100_perf_loh.RData")

[1] 1
[1] ": Setting seed..."
[1] "Partitioning data..."
[1] "Fit elastic net..."


“The metric "Accuracy" was not in the result set. ROC will be used instead.”

[1] "Get coefficients..."
[1] 2
[1] ": Setting seed..."
[1] "Partitioning data..."
[1] "Fit elastic net..."


“The metric "Accuracy" was not in the result set. ROC will be used instead.”

[1] "Get coefficients..."
[1] 3
[1] ": Setting seed..."
[1] "Partitioning data..."
[1] "Fit elastic net..."


“The metric "Accuracy" was not in the result set. ROC will be used instead.”

[1] "Get coefficients..."
[1] 4
[1] ": Setting seed..."
[1] "Partitioning data..."
[1] "Fit elastic net..."


“The metric "Accuracy" was not in the result set. ROC will be used instead.”

[1] "Get coefficients..."
[1] 5
[1] ": Setting seed..."
[1] "Partitioning data..."
[1] "Fit elastic net..."


“The metric "Accuracy" was not in the result set. ROC will be used instead.”

[1] "Get coefficients..."
[1] 6
[1] ": Setting seed..."
[1] "Partitioning data..."
[1] "Fit elastic net..."


“The metric "Accuracy" was not in the result set. ROC will be used instead.”

[1] "Get coefficients..."
[1] 7
[1] ": Setting seed..."
[1] "Partitioning data..."
[1] "Fit elastic net..."


“The metric "Accuracy" was not in the result set. ROC will be used instead.”

[1] "Get coefficients..."
[1] 8
[1] ": Setting seed..."
[1] "Partitioning data..."
[1] "Fit elastic net..."


“The metric "Accuracy" was not in the result set. ROC will be used instead.”

[1] "Get coefficients..."
[1] 9
[1] ": Setting seed..."
[1] "Partitioning data..."
[1] "Fit elastic net..."


“The metric "Accuracy" was not in the result set. ROC will be used instead.”

[1] "Get coefficients..."
[1] 10
[1] ": Setting seed..."
[1] "Partitioning data..."
[1] "Fit elastic net..."


“The metric "Accuracy" was not in the result set. ROC will be used instead.”

[1] "Get coefficients..."


0,1,2,3,4,5,6,7,8,9,10
Accuracy,0.6829268,0.804878,0.7073171,0.804878,0.7317073,0.6341463,0.6585366,0.7317073,0.804878,0.7560976
Sensitivity,0.7222222,0.8636364,0.9230769,1.0,1.0,0.8571429,0.8571429,0.8181818,0.9333333,1.0
Specificity,0.6521739,0.7368421,0.6071429,0.7333333,0.6071429,0.5185185,0.5555556,0.7,0.7307692,0.6
AUC,0.6845238,0.807598,0.7330918,0.7894737,0.7708333,0.6775,0.6911765,0.7065217,0.8083333,0.8076923
