In [1]:
import numpy as np
from gfe_estimation import estimate_grouped_fixed_effect_model_parameters

# Simulate the outcome variable

$y_{it} = x_{it}'\theta + \alpha_{g_it} + \epsilon_{it}$

## Simulation Strategy

For independent individuals i= 1,...,N=100 with g=1,G=2 distict grouping over time t=1,...,T=2

- Simulate k=2 independent covariates $x_{i1k} \sim Unif(0,30), x_{i1k} \sim Unif(15,50) $ ( Did I introduced a time trend that I should not have?) 
- Simulate $\epsilon_{it} \sim \mathcal{N}(0, \sigma^2)$
- alpha = [40, 60, 80, 90]
- thetha = [0.2, 0.8]

In [6]:
#make it flexible
def simulate():
    np.random.seed(1)
    alpha = np.array([40,60,80,90])
    theta = np.array([0.2, 0.8])
    x_i11 = np.random.uniform(low=0, high=30, size=100)
    x_i12 = np.random.uniform(low=0, high=30, size=100)
    x_i21 = np.random.uniform(low=15, high=50, size=100)
    x_i22 = np.random.uniform(low=15, high=50, size=100)
    x_it1 = np.transpose(np.asmatrix([x_i11, x_i21])).flatten()
    x_it2 = np.transpose(np.asmatrix([x_i12, x_i22])).flatten()
    X = np.vstack((x_it1,x_it2)).T
    epsilon = np.random.randn(200)
    dummy = np.tile(np.identity(4), 50).T
    Y = dummy @ alpha + X@theta + epsilon
    return Y, X

In [23]:
s= 1000
Y = simulate()[0]
X = simulate()[1]
estimates = estimate_grouped_fixed_effect_model_parameters(outcome=Y, groups=2, periods=2, individuals=100, 
                                                           alpha_0=np.array([40, 60, 80, 90]), 
                                                           theta_0 = np.array([0.2,0.8]), X=X, k=2)

### writing a flexible simulation and replication function:
- simulate from different distributions(check whether it could work)
- different number of features, groups, observations, pereiods(?)
- with different alpha and theta parameters
- seed(?)

### replication:
- How do I store the replicated simulation data?
- np.repeat my not be the way to go.
- How do I make sure stored X, Y and estimates come from the same simulation?

In [None]:
def simulate(
    seed
    nobs,
    nfeatures,
    ngroups,
    nperiods,
    theta,
    alpha,
    #...
):
    """Simulate data with unobserved grouped dynamic heterogeneity.
    Simulate outcomes y (length *nobs*), features X (*nobs* x *nfeatures*) and
    grouped fixed effects alpha by imposing a linear model.
    
    """