In [1]:
library(rjson)
library(ggplot2)
library(viridis)

Loading required package: viridisLite



In [2]:
data_name_range = c('ATAC_JV_adipose','CTCF_TDH_ENCODE','H3K27ac-H3K4me3_TDHAM_BP',
                    'H3K27ac_TDH_some','H3K36me3_AM_immune','H3K27me3_RL_cancer',
                    'H3K27me3_TDH_some','H3K36me3_TDH_ENCODE','H3K36me3_TDH_immune',
                    'H3K36me3_TDH_other')

In [3]:
get_run_time_data<- function(data_name,model,n_hyper_searches){
    run_combined_data   = data.frame()
    for(n_hyper_search in n_hyper_searches){
        if(n_hyper_search==0){
            file_name       = paste('../../../../result/',data_name,'/',model,'/run_dis_time_0_param.json',sep="")   
        }
        else if(n_hyper_search==1){
            file_name       = paste('../../../../result/',data_name,'/',model,'/run_dis_time_1_param_grid_search.json',sep="")
            
        }
        else if(n_hyper_search==2){
            file_name       = paste('../../../../result/',data_name,'/',model,'/run_dis_time_2_param_grid_search.json',sep="")
            
        }
        else if(n_hyper_search==5){
            file_name       = paste('../../../../result/',data_name,'/',model,'/run_dis_time_tuning.json',sep="")   
        }
        run_time_model  = fromJSON(file = file_name)
        keys            = names(run_time_model)
        n               = length(keys)
        fold_dis        = unlist(strsplit(keys,"_"))
        folds           = fold_dis[seq(1,length(fold_dis),2)]
        distri          = fold_dis[seq(2,length(fold_dis),2)]
        run_time        = as.numeric(run_time_model)
        distri          = paste(model,distri,sep="_")
        attribute_type  = rep("Run Time in secs",n)
        data            = rep(data_name,n)
        n_hyper         = rep(n_hyper_search,n)
        run_data_iter   = data.frame("folds"=folds,"models"=distri,"attribute"=run_time,"attribute_type"=attribute_type,"data"=data,"n_hyper"=n_hyper) 
        run_combined_data   = rbind(run_combined_data,run_data_iter)
    }
    return(run_combined_data)
}

In [4]:
get_accuracy_data <- function(data_name,model,n_hyper_searches){
    accuracy_combined_data   = data.frame()
    for(n_hyper_search in n_hyper_searches){
        for(distribution in c('normal','logistic','extreme')){
            if(n_hyper_search==0){
                file_name       = paste('../../../../result/',data_name,'/',model,'/accuracy_',distribution,'_param_0.JSON',sep="")   
            }
            else if(n_hyper_search==1){
                file_name       = paste('../../../../result/',data_name,'/',model,'/accuracy_',distribution,'_param_1_grid_search.JSON',sep="")
            }
            else if(n_hyper_search==2){
                file_name       = paste('../../../../result/',data_name,'/',model,'/accuracy_',distribution,'_param_2_grid_search.JSON',sep="")
            }
            else if(n_hyper_search==5){
                file_name       = paste('../../../../result/',data_name,'/',model,'/accuracy_',distribution,'.JSON',sep="")
            }
            accuracy     = fromJSON(file = file_name)
            distri       = paste(model,distribution,sep="_")
            n_fold        = length(names(accuracy))
            folds        = names(accuracy)
            distri       = rep(distri,n_fold)
            accuracys    = as.numeric(accuracy)
            attribute_type  = rep("Accuracy",n_fold)
            data            = rep(data_name,n_fold)
            n_hyper         = rep(n_hyper_search,n_fold)
            accuracy_data_iter  = data.frame("folds"=folds,"models"=distri,"attribute"=accuracys,"attribute_type"=attribute_type,"data"=data,"n_hyper"=n_hyper)
            accuracy_combined_data = rbind(accuracy_combined_data,accuracy_data_iter) 
        }
    }
    return(accuracy_combined_data)
}

In [5]:
model            = 'xgboost'
n_hyper_searches = c(0,1,2,5)

In [7]:
for(i in c(1,2,3,4,5,6,7,8,9,10)){
    combined_data        = data.frame()
    data_name            = data_name_range[i]
    run_data_iter      = get_run_time_data(data_name,model,n_hyper_searches)
    accuracy_data_iter = get_accuracy_data(data_name,model,n_hyper_searches)
    combined_data      = rbind(combined_data,run_data_iter)
    combined_data      = rbind(combined_data,accuracy_data_iter)
    fig_name           = paste('../../../../result/combined/combined_',i,'_hyper_search.pdf',sep="")
    pdf(fig_name)
    p <- ggplot(combined_data, aes(x=attribute,y=models,colour = factor(folds)))+ geom_point(size=5)+labs(color='Folds')
    p <- p + facet_grid(n_hyper+data~attribute_type,scales="free")+ theme_bw()
    print(p)
    dev.off()
}