In [1]:
library(MASS)

Some simulation functions
* Parameters: p and imp_mod (important modules, which is a vector). For example, p=400 and imp_mod = c(1,4) is our original setting: true features are 1,2,3,301,302,303 (group 1 and 4). P should be a multiple of 100. Note that the final group is always the grey group and other groups are always correlated ones. If some group is specified in imp_mod, the first three features of that group are important and its contribution to the response is given by $5x_{1}+2x_{2}+2x_{3}+5x_{2}x_{3}$. If p=100, then all the features are independent.
* Function f_sim(p,imp_mod): implementation of the above formula.
* Function sim_time(p,imp_mod,a1,a2): includes treatment by time interaction. When treatment is 1, the contribution of time is given by $a_{1}(t-med)^{2}$ and if treatment is 2, $a_{2}(t-med)^{2}$ where $a_{1}>0$ and $a_{2}<0$, med = median(1:T). Also it includes parameter n (number of patients), cor_feature (the covariance within correlated modules), var_re (the variance of random effect: for each patient, random intercept is drawn from normal distribution with mean 0) and var_noise (the covariance of noise for each observation, which is iid normal with mean 0)

In [2]:
f_sim = function (n,T,X_data,imp_mod){
    y = rep(0,n*T)
    for (mod in imp_mod){
        a1 = (mod-1)*100+1
        a2 = a1 + 1
        a3 = a2 + 1
        y = y+5*X_data[,a1]+2*X_data[,a2]+2*X_data[,a3]+5*X_data[,a2]*X_data[,a3]
    }     
    return (y)
}

sim_time=function(n,T=5,p=400,imp_mod,cor_feature=0.8,var_re=1,
                  var_noise=1,a1=5,a2=-5){
    p0 = 100
    p_mult = p/100 # number of modules
    if (p_mult%%1 !=0){
        stop("p should be a multiple of 100")
    }
    if (p_mult==1){
        cov_feature = diag(p0) # just one independent group
    }else{
        # Now p_mult>1
        # covariance matrix beween features
        cov_feature = matrix(0,nrow = p, ncol = p)
        # cov of correlated modules
        cov_star = matrix(cor_feature,nrow = p0,ncol = p0)
        diag(cov_star)=1
        # all but the last modules are correlated
        for (k in 1:(p_mult-1)){
            cov_feature[((k-1)*p0+1):(k*p0),((k-1)*p0+1):(k*p0)] = cov_star
        }
        # last modules are independent
        k = p_mult
        cov_feature[((k-1)*p0+1):(k*p0),((k-1)*p0+1):(k*p0)] = diag(p0)
        
    }
     # Create X matrix
    data = mvrnorm(n=n*T,rep(0,p),cov_feature) # observations of X are iid
    data <- data.frame(data)
    names(data) = paste("V",1:p,sep="")

    #### random intercept for each patient ####
    # random intercept draw from N(0,1)
    b = mvrnorm(n = 1, rep(0,n), diag(x=var_re,n))
    data$rand_int = rep(b,each = T)
    ### end random intercept

    data$time <- rep(1:T, n) # time
    # treatment 1 or 2 ,categorical type
    data$treatment[1:(n*T/2)] <- 1 
    data$treatment[((n*T/2)+1):(n*T)] <- 2
    data$treatment = factor(data$treatment)

    # patient information
    data$patient = rep(1:n,each = T)
    
    # noise
    noise = mvrnorm(n = 1, rep(0,n*T), diag(x=var_noise,n*T))

    # response y
    med = median(1:T)
    data$y = (f_sim(n=n,T=T,X_data=data[1:p],imp_mod=imp_mod)+ 
        (data$treatment==1)*a1*(data$time-med)^2 + 
        (data$treatment==2)*a2*(data$time-med)^2 + data$rand_int+noise)
    
    return(data)
}

# Sample run

In [5]:
# orginal setting 
set.seed(100)
n = 100
imp_mod = c(1,4)
var_noise = 0
data = sim_time(n,imp_mod=imp_mod, var_noise=var_noise)
head(data)

V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,...,V396,V397,V398,V399,V400,rand_int,time,treatment,patient,y
-0.919397,0.4416402,-1.6874364,-1.228292,-1.471919,-0.7258681,-1.000708,-0.5809589,-0.6480068,-1.0717893,...,1.0363352,-0.0749961081,0.7390165,1.3488629,-1.15259948,0.7452864,1,1,1,8.466967
-0.8763958,-0.9931904,-0.5903482,-0.893897,-1.2517455,-0.4072832,-1.8738281,-1.2849527,-1.0130168,-0.3346373,...,-0.1901736,-0.9470827924,-2.1501213,-0.2297432,0.46416071,0.7452864,2,1,1,-3.849688
-0.604631,-0.3771163,-0.8634822,-0.9587687,-0.8106843,0.2184991,-1.4998981,-0.8083261,-0.2937366,-0.7714238,...,1.2566406,0.0003758095,0.8217849,-1.7324848,0.72893247,0.7452864,3,1,1,7.850598
0.1238469,-1.4196072,-0.6497125,-0.6689533,-1.2245386,-1.3911706,-0.3050343,-0.956755,-1.2421426,-0.8961609,...,0.823516,-1.5249574007,1.6537538,-0.627309,0.41061745,0.7452864,4,1,1,6.635803
-1.5415618,-0.9090509,-0.5699499,-1.4404816,-0.9707221,-1.2020829,-1.1336156,-0.1288167,-1.5646634,-0.2275436,...,-1.1591249,0.0983516474,-0.5823098,-0.2405078,-1.4611472,0.7452864,5,1,1,6.361511
-0.3119151,-0.7531623,-0.3931515,-0.3771317,-0.7265423,-0.6114486,-1.1590108,-0.7932671,-0.8573822,-0.5079328,...,-1.1442965,-1.5266078213,-0.7743898,-1.3375772,-0.01890809,0.5078654,1,1,2,15.281902
