# Replication of Inoue (2008)

Some noteworthy points on the paper
- GMM should be more efficient than the FE estimator since it is a GLS version of the FE estimator
    - Results in lower RMSE
- He accounts for asymptotics for S,T stable and N -> inf (group size)
    - GMM and OLS both biased and consistent, but GMM more efficient
    - Therefore, as group size increases, GMM and OLS should converge to the same value

Problems with the implementation

- The estimator is implemented correctly
    - Yields the same results as verified OLS estimator when GMM weighting matrix equals identity
- However, GMM estimates have higher RMSE 
    - So, somehow the proposed optimal weighting matrix is actually suboptimal (compared to equal weighting), leading to higher RMSE. 
- Moreover, the RMSE does not decrease as $\bar{N}$ increases
    - Possibly an error in the averaging process? Or the specification of the variances?




#### Classes that I use to simulate and estimate the data

In [3]:
import numpy as np

# Implementation of the simulation study of Inoue (2008), slow
class GMM_replication():

    def simulate(self, S, T, N_bar, var_ds, var_xst, var_zi, var_vzst):
        '''
        Gamma = individual regressor coefficient
        Beta = group regressor coefficient
        S = groups
        T = time periods
        N_bar = average group size
        var_ds = variance of group FE
        var_xst = variance of group regressor
        var_zi = variance of individual regressor + individual error
        var_vzst = variance of individual regressor
        '''
        
        # Convert to SD
        var_ds = np.sqrt(var_ds)
        var_xst = np.sqrt(var_xst)
        var_zi = np.sqrt(var_zi)
        var_vzst = np.sqrt(var_vzst)
        
        # Determining group sizes
        ## Pi matrix
        pis = np.random.uniform(0.3, 0.6, size = S*T)
        Pi = np.diag(pis / pis.sum())

        group_sizes = np.ceil(np.diag(Pi) * N_bar * S * T).astype(int)

        # group_sizes = (np.ones(S*T)*100).astype(int)

        N = int(group_sizes.sum())

        
        var_ai_ei = 1 - var_ds # individual FE + individual error
        var_ezi = var_zi - var_vzst

        # Generate group FE
        d_s = np.random.normal(0, var_ds, S)

        # Generate regressors x, v, iid normal
        x_st = np.random.normal(0, var_xst, S*T)
        v_zst = np.random.normal(0, var_vzst, S)
        
        # Simulate y_i = S*T
        ## Since beta, gamma = 0, the only variation in y_i comes from a_i, d_s, e_i

        y_bar_s = []
        z_bar_st = []

        # z_i = []

        for s in range(S):
            for t in range(T):
                # y_bar
                d = d_s[s] # FE for group s
                n = group_sizes[s*t]
                ai_ei = np.random.normal(0, var_ai_ei, n)
                y_bar_s.append(np.mean(ai_ei)+d)
                
                # z_i
                v = v_zst[s]
                z = np.random.normal(0, var_ezi, n)
                # z_i.append(z)
                z_bar_st.append(np.mean(z) + v)
                
        self.X = np.vstack((x_st, np.array(z_bar_st))).T
        self.Y = y_bar_s
        self.group_sizes = group_sizes


    def estimate(self, X, Y, S, T, group_size_vec):
        M = np.kron(np.eye(S), demean(T))
        Ts = np.arange(T-1, S*T, T) # Indices of last time-period, to use in T*N vectors
        
        X1 = M @ X
        X2 = np.delete(X1, Ts, axis = 0)
        
        group_size_mat = np.diag(group_size_vec)
        Omega = M@group_size_mat@M
        Omega = np.delete(Omega, Ts, axis = 0)
        Omega = np.delete(Omega, Ts, axis = 1)
        
        Omega_inv = np.linalg.inv(Omega)
        
        Y1 = M @ Y
        Y2 = np.delete(Y1, Ts)
        
        coefs = np.linalg.inv(X2.T @ Omega_inv @ X2) @ X2.T @ Omega_inv @ Y2 

        return coefs.tolist()
        
            
    def simulate_estimate(self, S, T, N_bar, var_ds, var_xst, var_zi, var_vzst, reps):
        OLS_coefs, GMM_coefs = [], []
        
        for _ in range(reps):
        
            self.simulate(S, T, N_bar, var_ds, var_xst, var_zi, var_vzst)

            # GMM estimator reduces to OLS with identity weighting matrix
            OLS_estimate = self.estimate(self.X, self.Y, S, T, np.ones(S*T))
            OLS_coefs.append(OLS_estimate)
            # Use optimal weighting matrix
            group_sizes_normalized = self.group_sizes / np.sum(self.group_sizes)
            GMM_estimate = self.estimate(self.X, self.Y, S, T, group_sizes_normalized)
            GMM_coefs.append(GMM_estimate)
            
            
        return [np.array(OLS_coefs), np.array(GMM_coefs)]
    
    

def demean(T):
    M_T = np.eye(T) - (1/T) * np.ones((T, T))
    return M_T
        


#### Sample simulation/estimation

In [4]:
# Regressors 
## X_st = iid normal
gamma, beta = 0, 0
# Groups
S, T = 8, 8

# Pi matrix
pis = np.random.uniform(size = S*T)
Pi = np.diag(pis / pis.sum())

N_bar = 256
group_sizes = np.ceil(np.diag(Pi) * N_bar * S * T).astype(int)


# Variances
var_ds = 0.5 # group FE

var_xst = 1 # group regressor variance
var_zi = 1 # individual regressor + individual error variance
var_vzst = 0.5 # individual regressor variance



res = GMM_replication().simulate_estimate(S, T, N_bar, var_ds, var_xst, var_zi, var_vzst, 500)
# res[0] = estimated coefs for OLS, res[1] = estimated coefs for GMM


OLS_MAE = abs(res[0]).mean()
GMM_MAE = abs(res[1]).mean()

OLS_RMSE = np.sqrt(np.square(res[0]).mean())
GMM_RMSE = np.sqrt(np.square(res[1]).mean())



print(f"OLS RMSE: {OLS_RMSE}\nGMM_RMSE: {GMM_RMSE}\nRatio: {OLS_RMSE/GMM_RMSE}")


  np.sqrt(res[0]).mean()


nan

In [10]:
OLS_RMSE = np.sqrt(np.square(res[0]).mean())
GMM_RMSE = np.sqrt(np.square(res[1]).mean())



print(f"OLS RMSE: {OLS_RMSE}\nGMM_RMSE: {GMM_RMSE}\nRatio: {OLS_RMSE/GMM_RMSE}")

OLS RMSE: 0.09914368723841958
GMM_RMSE: 0.09914368723841958
Ratio: 1.0
