In [None]:
from statsmodels.tsa.arima_process import ArmaProcess
import numpy as np
import pandas as pd

In [None]:
def mean_model(X, v, b0=0, c0=0, c1=0):
    # Here, v should be v_rep of dimension N. 
    mu = np.exp(
        c0*np.cos(X[:,0]) + c0*np.cos(X[:,1]) + c0*np.cos(X[:,2])
        + c1/(X[:,3]**2+1) + c1/(X[:,4]**2+1)
        + b0 + v
    )
    return mu

In [None]:
def generate_data(
    data_type, dir_name,
    b0 = 0.2, c0 = 0.2, c1 = 0.2,
    p = 10, ar = [1, -0.5], ma = 1,
    n_simul = 100
):
    
    n_sub, n_num, lam, rand_dist = data_type.split('-')
    n_sub, n_num, lam = int(n_sub), int(n_num), float(lam)
    N = n_sub * n_num
    n_new = n_sub    
    arma = ArmaProcess(ar, ma) # AR(1) with coeff 0.5
    
    for repeat in range(n_simul):

        # Generate data

        np.random.seed(repeat)
        X = arma.generate_sample(nsample=(N,p), axis=1)
        if rand_dist == 'fixed':
            u_rep, v_rep = np.repeat(1, N), np.repeat(0, N)
        elif rand_dist == 'gamma':
            u = np.random.gamma(1/lam, lam, n_sub)
            u_rep = np.repeat(u, n_num)
            v_rep = np.log(u_rep)
        elif rand_dist == 'normal':
            v = np.random.normal(0, np.sqrt(lam), n_sub)
            u = np.exp(v)
            u_rep = np.repeat(u, n_num)
            v_rep = np.repeat(v, n_num)
            
        mu = mean_model(X, v_rep, b0, c0, c1)
        y = np.random.poisson(mu)

        data = pd.DataFrame(X, columns=[('x'+str(i)) for i in range(p)])
        data['y'] = y
        data['u'] = u_rep
        data['mu'] = mu
        data['sub'] = np.repeat(np.arange(n_sub), n_num)
        data['num'] = np.tile(np.arange(n_num), n_sub)

        # save data

        file_name = dir_name + 'simul-data-' + data_type + '-' + str(repeat)
        data.to_csv(file_name+'.csv', index=False)

In [None]:
dir_name = os.getcwd()
data_type_list = ['1000-10-0-fixed', '1000-10-0.5-gamma', '1000-10-1-gamma', '1000-10-0.5-normal', '1000-10-1-normal']

for data_type in data_type_list:    
    generate_data(
        data_type, dir_name,
        b0 = 0.2, c0 = 0.2, c1 = 0.2,
        p = 10, # only the first 5 covariates are used to generate mu
        ar = [1, -0.5], ma = 1,
        n_simul = 100)