# Epigenetic score
A penalised regression model with the human Dex residuals as the outcome and the 496 cross-tissue CpGs as predictors was run using the glmnet package in R. 

In [5]:
library(glmnet)
library(corrplot)

corrplot 0.84 loaded


In [3]:
# human methylation data of MPIP cohort with dex residualized by sex, case/control status, age, bmi and cell counts.
data <- readRDS("data/DNAm_of_overlapping_hpc_and_human.Rds")

In [4]:
# determine EN alpha

#corrrelation strengh c(x)
meth = data[,-grep("residuals.dex.human", colnames(data))]
cor_mat=cor(meth[,-c(dim(meth)[2]-1,dim(meth)[2])])
#get upper triangle matrix
tmp=cor_mat
tmp[row(cor_mat)>col(cor_mat)] = 0
diag(tmp) = 0
#Frobenius norm 
Fnorm = norm(tmp, type="F") #sqrt( sum(tmp^2))
c = Fnorm / sqrt( (dim(cor_mat)[1]^2-dim(cor_mat)[1]) / 2)
# get alpha
alpha= 10^(-c) #0.2761701

In [None]:
# The penalised regression model was run 100 times and the best-fit lambda values were extracted. 
write.table(cbind("lambda.min","lambda.1se" ), file="data/lambda_selection_100_elastic_long.txt", quote = F,sep="\t", col.names = F, row.names = F)
for(i in 1:100){
  x= as.matrix(na.omit(data)[,-grep("residuals.dex.human", colnames(data))]) #exlude residuals
  y=na.omit(data$residuals.dex.human)
  alpha_value=alpha #is the lasso penalty,
  cvfit = cv.glmnet(x, y, alpha = alpha_value, standardize = T )
  write.table(cbind(cvfit$lambda.min,cvfit$lambda.1se ), file="data/lambda_selection_100_elastic_long.txt", append = T, quote = F,sep="\t", col.names = F, row.names = F)
}  

In [None]:
# Default settings for the cross-validation glmnet model were considered: 10-fold cross validation and avg. lambda from the 100 runs to give the optimal solution.
lambda = read.delim("data/lambda_selection_100_elastic_long.txt", head=T)
cvfit = cv.glmnet(x, y, alpha = alpha, standardize = T )
coef.fit=coef(cvfit, s=mean(lambda$lambda.1se))
index.fit <- which(coef.fit[,1] !=0) 
variables<-row.names(coef.fit)[index.fit]
variables<-variables[ !(variables %in% '(Intercept)')]
coef.value<-coef.fit[index.fit,]
#write.table(coef.value[-1], "data/EpiScore_Cpgs_n24.txt", row.names = T,quote = F,col.names = F)
