In [None]:
library(caret)
library(GGally)
library(glmnet)
library(pheatmap)

In [None]:
source("modelpipe.R")

In [None]:
source("MaclearnUtilities.R")

In [None]:
## -----------------------------------------------------------------
## linear regression simulated example
## -----------------------------------------------------------------
x = data.frame(matrix(rnorm(60), nrow=15, ncol=4))
colnames(x) = LETTERS[1:4]
x$B = x$A + 0.01*x$B

In [None]:
y = x$D + rnorm(nrow(x))

In [None]:
linmod = lm(y ~ ., data=x)
coef(linmod)

In [None]:
l2mod = glmnet(x=as.matrix(x), y=y, alpha=0, lambda=0.1)
coef(l2mod)

In [None]:
l1mod = glmnet(x=as.matrix(x), y=y, alpha=1, lambda=0.1)
coef(l1mod)

In [None]:
## -----------------------------------------------------------------
## load Hess data
## -----------------------------------------------------------------
readTab = function(file) {
    read.table(file, sep="\t",
               header=TRUE, row.names=1, check.names=FALSE)
}

In [None]:
x = data.frame(
    t(readTab("microarray/Hess/HessTrainingData.tsv.gz")),
    check.names = FALSE
)
annot = readTab("microarray/Hess/HessTrainingAnnotation.tsv")
y = annot$pCRtxt
names(y) = rownames(annot)

In [None]:
logisticFitter = SolderedPipeFitter(
    FastTSelector(nFeat = 4),
    GlmFitter(alpha=0, lambda=0)
)
logisticFit = logisticFitter(x, y)
logisticCoef = coef(logisticFit[[2]]$fit)

In [None]:
heatX = x[ , setdiff(rownames(logisticCoef), "(Intercept)")]
heatY = data.frame(row.names=names(y), Group=y)
pheatmap(
    heatX,
    annotation_row = heatY,
    annotation_color = list(Group=c(pCR="black", RD="gray")),
    show_rownames=FALSE
)

In [None]:
ggpairs(data.frame(heatX, y=y))

In [None]:
## -----------------------------------------------------------------
## regularized models
## -----------------------------------------------------------------
l2Fitter = SolderedPipeFitter(
    FastTSelector(nFeat = 4),
    GlmFitter(alpha=0, lambda=0.05)
)
l2Fit = l2Fitter(x, y)
l2Coef = coef(l2Fit[[2]]$fit)

In [None]:
l1Fitter = SolderedPipeFitter(
    FastTSelector(nFeat = 4),
    GlmFitter(alpha=1, lambda=0.05)
)
l1Fit = l1Fitter(x, y)
l1Coef = coef(l1Fit[[2]]$fit)

In [None]:
## -----------------------------------------------------------------
## try with cross-validation
## -----------------------------------------------------------------
cvLogistic = train(
    logisticFitter,
    x,
    y,
    trControl = trainControl(
        method = "cv",
        number = 5,
        seeds = as.list(rep(123, 6))
    )
)
cvLogistic$results

In [None]:
cvL1 = train(
    l1Fitter,
    x,
    y,
    trControl = trainControl(
        method = "cv",
        number = 5,
        seeds = as.list(rep(123, 6))
    )
)
cvL1$results

In [None]:
cvL2 = train(
    l2Fitter,
    x,
    y,
    trControl = trainControl(
        method = "cv",
        number = 5,
        seeds = as.list(rep(123, 6))
    )
)
cvL2$results