/
choose_m.R
92 lines (66 loc) · 2.33 KB
/
choose_m.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
# Vary N configuration:
# .m <- seq(1e1, 1e2, length.out=10)
# .p <- 5e1
# .N <- c(1e4, 1e5, 1e6)
# .n <- expand.grid(.m,.N) %>% apply(1, function(x) ceiling(x[2]/x[1]))
# .sigma <- 1e1
# TODO: simulate with large p for high-dim scenario.
# Possible a single plot, Fixing N, m on the x, and different p.
# For large p, MSE should be linear.
# For small p, MSE can be non linear.
library(InformationAndInference)
.reps <- 1e3
.sigma <- 2e0
.N <- 5e4
(.m <- seq.int(1e1, 1e2, by=10) )
(.n <- round(.N/.m))
.kappa <- 0.2
(.p <- seq(5e1, min(.n)*.kappa, length.out=4) %>% round(-1))
.beta.norm <- 1e1
## OLS
configurations.000 <- makeConfiguration(
reps = .reps,
m = .m, p = .p, n = .n, lambda = NA,
model = my.ols,
link = identity,
sigma = .sigma,
beta.maker = makeBetasDeterministic,
beta.norm=.beta.norm,
beta.star.maker = BetaStarIdentity,
data.maker=makeRegressionData,
name='ols')
configurations.000 %>% select(N) %>% round(-3) %>% table
configurations.000 %<>% filter(round(N,-2) ==.N)
nrow(configurations.000)
# MSEs.000 <- apply(configurations.000, 1, replicateMSE)
# attr(MSEs.000, "createdAt") <- Sys.time()
cl <- makeCluster(35, type="FORK", rscript_args = c("--no-init-file", "--no-site-file", "--no-environ"))
clusterEvalQ(cl, library(InformationAndInference))
MSEs.000 <- parApply(cl, configurations.000, 1, replicateMSE)
attr(MSEs.000, "createdAt") <- Sys.time()
## Ridge
.lambda <- 1
configurations.001 <- makeConfiguration(
reps = .reps,
m = .m, p = .p, n = .n,
lambda = .lambda,
model = my.ridge,
link = identity,
sigma = .sigma,
beta.maker = makeBetasDeterministic,
beta.norm=.beta.norm,
beta.star.maker = BetaStarRidge,
data.maker=makeRegressionData,
name='ridge')
configurations.001 %<>% filter(round(N,-2) ==.N)
nrow(configurations.001)
# cl <- makeCluster(3, type="FORK", rscript_args = c("--no-init-file", "--no-site-file", "--no-environ"))
# clusterEvalQ(cl, library(InformationAndInference))
MSEs.001 <- parApply(cl, configurations.001, 1, replicateMSE) #object with raw errors.
attr(MSEs.001, "createdAt") <- Sys.time()
save(MSEs.001, configurations.001,
file='RData/MSEs_choose_m_ridge.9.RData',compress='bzip2')
print(object.size(MSEs.000), units="Mb")
save(MSEs.000, configurations.000,
file='RData/MSEs_choose_m.9.RData',compress='bzip2')
stopCluster(cl)