In [7]:
source("../Data Generator.r")
library(fuzzyforest)
library(WGCNA)

In [8]:
var_re = 3
### training, validation and test set ###
set.seed(100)
n = 200
p = 400
imp_mod = c(1,4)
var_noise = 1
data = sim_time(n=n,p=p,imp_mod=imp_mod,var_noise=var_noise,a1=0,a2=0,var_re=var_re)

# test set (used for testing performance using optimal parameters)
set.seed(101)
n_test = 100
data_test = sim_time(n=n_test,p=p,imp_mod=imp_mod,var_noise=var_noise,a1=0,a2=0,var_re=var_re)

In [9]:
n_run = 50 # the number of times FF will run on the data set
n_top = 10 # the top n_top variables will be selected
# create empty data frame to save simulation results in
result_ff = matrix(0,n_run+1,p+1) # the last row is for average
result_ff = data.frame(result_ff)
names(result_ff)[p+1] = "error"
names(result_ff)[1:p] = paste("V",1:p,sep="")

In [18]:
system.time({
for(Repeat in 19:20){
    set.seed(Repeat+32) # change seed each loop
    
    var = c(paste("V",1:p,sep=""))
    Formula = as.formula(paste("y~",paste(var,collapse = "+")))
    
    ff <- wff(Formula,data=data,
        screen_params=screen_control(min_ntree = 500,keep_fraction = 0.05),
        select_params = select_control(min_ntree = 500,number_selected = n_top),
        final_ntree = 1000, num_processors = 1)

    top_variables = ff$feature_list[,1]

    # error on the test set
    preds <- predict(ff, new_data=data_test)
    error = mean((data_test$y-preds)^2)

    # If variable was selected as important, indicate with 1 (otherwise 0)
    for (i in 1:p){
      result_ff[Repeat,i] <- as.numeric(paste("V",i,sep="") %in% top_variables)
    }
    result_ff[Repeat,p+1] <- error

    # show the progress
    flush.console()
    cat(Repeat,"\n")
}
})

19 
20 


   user  system elapsed 
 686.89    1.75  696.50 

In [22]:
result_ff[n_run+1,] = colMeans(result_ff[1:n_run,])
name = paste("ff_n",n,".csv",sep="")
write.csv(result_ff,file = name)

In [23]:
result_ff

V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,...,V392,V393,V394,V395,V396,V397,V398,V399,V400,error
1.0,1.0,1.0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,33.45325
1.0,1.0,1.0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,34.16724
1.0,1.0,1.0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,34.37026
1.0,1.0,1.0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,34.57654
1.0,1.0,1.0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,34.50443
1.0,1.0,1.0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,33.75795
1.0,1.0,1.0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,34.23275
1.0,1.0,1.0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,34.1346
1.0,1.0,1.0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,34.19166
1.0,1.0,1.0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,34.50638


In [21]:
# sort(result_ff[n_run+1,][1:(p)],index.return=TRUE,decreasing = TRUE)[1:20]