# See how ``run_experiments`` works based on a single experiment

In [2]:
devtools::load_all("../occCompaRe")
devtools::load_all(".")

Loading occCompare
Loading occCompareExp


## Setup experiments

A proper set up of the experiments is required by defining the classifier comparison settings list (parcc). Furthermore, a function (get_refset()) must be available which creates the reference sets as required by the functions of occCompaRe

In doubt how this can be done see .002_setup.

In [3]:
parcc <- set_expOpts("ocsvmOnly4revision1") # there are paths defined as default in there which you might need to adjust
#parcc

## Variables defined in ``run_experiments``


In [4]:
get_refset <- parcc[["get_resfet"]]
args.rs <- parcc[["args.rs"]]

## Select one TASK

In [5]:
args.rs$dirData

#### Arguments

In [6]:
fun = c(".trainUtest")
doPar=TRUE
# nCores=NULL, # used from parcc$nCores 
debug=FALSE
overwrite=FALSE
overwrite_resTable=FALSE
loadOnly=FALSE

In [7]:
loopElements <- c("seed", "nP", "nU", "fset", 
                "idP", "method")

print.fun <- function(loopElements, task, parcc) {
atFrom <- function(pm) {
  at=which(parcc[[pm]]==task[[pm]])
  from=length(parcc[[pm]])
  sprintf("%s/%s", at, from) }
cat(paste(paste(loopElements, 
                sapply(loopElements, atFrom), 
                sep=":", collapse=" | "), "\n"))
}

In [8]:
task <- parcc
task$seed=parcc$seed[1] # LOOP
task$nP=parcc$nP[1]     # LOOP
task$nU=parcc$nU[1]     # LOOP
task$fset=parcc$fset[1] # LOOP

rs_allTr <- get_refset(seed=task$seed,
                       fset=task$fset,
                       nP=task$nP, nU=task$nU,
                       args.rs)

task$idP <- parcc$idP[1]         # LOOP 

task$scaling <- parcc$scaling[1] # LOOP

# remove the training samples of the non-idP classes 
rs <- rs_allTr[rs_allTr$set=="te" | rs_allTr$y %in% c(0, task$idP), ]

# class-specific scaling using preProcess
if (task$scaling == "ccs01") {
idx_scale <- rs$set=="tr" & rs$y==task$idP
pp <- preProcess(rs[idx_scale, -(1:2)], method="range")
# This is a fast solution...
if ("binsvm" %in% parcc$method) {
  rs_sup <- predict(pp, rs_allTr[rs_allTr$y!=0, ])
  # print(check_refset(rs_sup))
  rs_sup$y <- puFactor(rs_sup$y==task$idP, TRUE)
}
rs[, -(1:2)] <- predict(pp, rs[, -(1:2)])
} else {
stop("Currently only ccs01 scaling is supported.")
}

idx4sigest <- c(which(rs$set=="tr" & rs$y==task$idP))

rs$y <- puFactor(rs$y==task$idP, TRUE)

In [9]:
task$method = parcc$method[1]  # LOOP

cat("\n\n*********************************\n")
print.fun(loopElements, task, parcc)
tuneGrid.bak <- 
  get_tuneGrid(rs[idx4sigest, -c(1,2)],
               method=task$method,
               param=task$param[[task$method]],
               seed=task$seed)



*********************************
seed:1/10 | nP:1/1 | nU:1/1 | fset:1/3 | idP:1/8 | method:1/1 


In [12]:
for (ff in c(".trainUtest", ".resampling-val")) {
    cat("------------------------------------\n")
    if(ff!=".trainUtest" & task$method%in%c("binsvm")) {
      next
      cat(paste0("Skipping ", ff, " for method ", task$method, ".\n"))
    }
    idx_rs <- !logical(nrow(rs))
    stsp <- strsplit(ff, "-")[[1]]
    ff <- stsp[1]
    if (length(stsp)==1) { # => .trainUtest
      # idx_rs[rm_ifNotPuResampl] <- F
      cat("PREV. removing PU - samples. Now not anymore!\n")
    } else {
      task$resampling <- stsp[2]
      stsp.r <- strsplit(task$resampling, "_")[[1]]
      if (length(stsp.r)==1) { # => normal resampling
        #idx_rs[rm_ifNotPuResampl] <- F
        cat("PREV. removing PU - samples. Now not anymore!\n")
      }
    }
    summaryFile <- get_summaryFile(task$dn.res, task, ff)
    if (overwrite_resTable)
      unlink(summaryFile)
    if (file.exists(summaryFile) & !overwrite & !loadOnly) {
      tasks_done <- try(read.csv(summaryFile, header=TRUE))  # was header=F!
      # colnames(tasks_done)[1:length(task$param[[task$method]])] <- 
      #   names(task$param[[task$method]])
      idx = matchDf(tuneGrid.bak, tasks_done)
      if (!all(is.na(idx))) {
        tuneGrid <- 
          tuneGrid.bak[-as.numeric(rownames(
            tuneGrid.bak[!is.na(idx), ])), ]
      } else {
        tuneGrid <- tuneGrid.bak
      }
    } else {
      tuneGrid <- tuneGrid.bak
    }
    cat(ff, ":", basename(summaryFile), "\n")
    cat("Number of models - all/to do:", 
        nrow(tuneGrid.bak), " / ", nrow(tuneGrid), "\n")
    if (nrow(tuneGrid)>0) {
      if (substr(task$method, 1, 3) == "bin") {
        rs_run_exp <- rs_sup 
      } else { 
        rs_run_exp <- rs[idx_rs, ]
      }
      re <- 
        run_exp(rs_run_exp,
                looppar=task,
                tuneGrid=tuneGrid,
                fun=ff, 
                doPar=doPar, 
                loadOnly=loadOnly,
                overwrite=overwrite,
                rm_ifNotPuResampl=rm_ifNotPuResampl)
      if (class(re)=="try-error")  {
        print(">>>>>> ERROR IN trainUtest!")
      }
    }
}

------------------------------------
PREV. removing PU - samples. Now not anymore!
.trainUtest : seed1_fsetre3b_nP50_nU10000_idP1_methodocsvm.trainUtest.csv 
Number of models - all/to do: 35  /  0 
------------------------------------
PREV. removing PU - samples. Now not anymore!
.resampling : seed1_fsetre3b_nP50_nU10000_idP1_methodocsvm.resampling_val.csv 
Number of models - all/to do: 35  /  35 
From 100% (n=35) done [%]: 0.10.20.30.40.50.60.70.80.90.100.
