In [1]:
# install.packages('refineR')

In [2]:
library('refineR')

# path to a directory holding CSVs with CA-125 values for each age group
dir <- '../data/age_binned/'
file_list <- list.files(path = dir)
file_list <- file.path(dir, file_list)

# check file list
print(file_list)


 [1] "../data/age_binned//data_18.csv" "../data/age_binned//data_25.csv"
 [3] "../data/age_binned//data_30.csv" "../data/age_binned//data_35.csv"
 [5] "../data/age_binned//data_40.csv" "../data/age_binned//data_45.csv"
 [7] "../data/age_binned//data_50.csv" "../data/age_binned//data_55.csv"
 [9] "../data/age_binned//data_60.csv" "../data/age_binned//data_65.csv"
[11] "../data/age_binned//data_70.csv" "../data/age_binned//data_75.csv"
[13] "../data/age_binned//data_80.csv" "../data/age_binned//data_85.csv"


In [3]:
# for holding predicted CSV files, lower limits, and upper limits
execution_times <- numeric()

bins_5 <- c(18, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85)
c <- 1
for (file in sort(file_list)) { # loop file list
    
    data <- read.csv(file)$value # read simulated values
    
    if (length(data)<=10) {
        next
    }
    
    start_time <- Sys.time()

    fit <- findRI(data, NBootstrap=200, model='BoxCox') # fit the refineR model
    result <- getRI(fit, RIperc = c(0, 0.99), CIprop=0.95, Scale='original')

    end_time <- Sys.time()
    iteration_time <- end_time - start_time # check processing time

    cat(file,'\n') # print data stats
    cat(length(data), " values\n")
    cat(result$PointEst,'\n') # print estimated RI
    cat('max value: ', max(data), '\n\n') # print max data value
    cat('iteration time: ',iteration_time) # print processing time

    # write results
    write.csv(result, file=paste0('data_', bins_5[c], '_99p'), row.names=FALSE)
    c <- c+1

}

cat(execution_times)


../data/age_binned//data_18.csv 
3015  values
0 43.65976 
max value:  140.9 

iteration time:  3.46289../data/age_binned//data_25.csv 
2446  values
0 43.4103 
max value:  126.1 

iteration time:  3.546556../data/age_binned//data_30.csv 
2728  values
0 48.22432 
max value:  137.6 

iteration time:  3.505222../data/age_binned//data_35.csv 
3224  values
3.346995e-153 41.92052 
max value:  136 

iteration time:  3.215402../data/age_binned//data_40.csv 
4187  values
0 49.03958 
max value:  138.2 

iteration time:  2.966628../data/age_binned//data_45.csv 
4404  values
0 37.93246 
max value:  136.1 

iteration time:  3.062442../data/age_binned//data_50.csv 
4055  values
0 33.5243 
max value:  135.8 

iteration time:  3.014552../data/age_binned//data_55.csv 
3311  values
0 29.85936 
max value:  141 

iteration time:  3.112689../data/age_binned//data_60.csv 
2958  values
0 27.67604 
max value:  140.5 

iteration time:  3.219714../data/age_binned//data_65.csv 
1857  values
0 28.42058 
max value:

 Data has small sample size (N < 1000). Evaluate results carefully.



../data/age_binned//data_80.csv 
569  values
2.536967e-73 32.81338 
max value:  140.9 

iteration time:  8.677935

 Data has small sample size (N < 1000). Evaluate results carefully.



../data/age_binned//data_85.csv 
415  values
2.1024e-46 35.26406 
max value:  140 

iteration time:  6.791713