# David Alderman
## Econometrics
## Assignment #4
### Problem #2

In [1]:
import pandas as pd
import numpy as np

In [2]:
col_headers = ['y1','y2','y3','y4','y5','y6','y7','y8','y9','y10','y11']
df = pd.read_csv(r'C:\Users\david\OneDrive\Documents\1a. Columbia MSFE\2020.09_Intro_to_Econometrics\psets\Pset 5\card_wage_2008.txt',
                 sep='\s+',names = col_headers)
num_years = len(df.columns)

# Part A

### Discard individuals with a change in wages YoY by a factor of 10

In [3]:
factor_df = pd.DataFrame()
for i in range(num_years-1): # loop to determine change in wages by factor (e.g. year 2 wages / year 1 wages)
    yr_i = 'y' + str(i+1)
    yr_i1 = 'y' + str(i+2)
    factor_df[yr_i1 + '/' + yr_i] = df[yr_i1] / df[yr_i]

factor_df['Row Max'] = factor_df.max(axis=1)
factor_df['Row Min'] = factor_df.min(axis=1)
max_factor = 10

max_index = factor_df.loc[factor_df['Row Max'] > max_factor]
min_index = factor_df.loc[factor_df['Row Min'] < 1/max_factor]
min_max = pd.concat([max_index,min_index]).sort_index()
min_max = min_max.loc[~min_max.index.duplicated(keep='first')]

for row in min_max.index:
    df = df.drop([row])
print('Number of individuals post-discard:')
print(len(df))

Number of individuals post-discard:
1302


# Part B

Moments defined as:

5 Moments

$
\psi_{13} = Y_{i1}(Y_{i3} - Y_{i2} - \theta (Y_{i2} - Y_{i1}))
$

$
\psi_{14} = \begin{bmatrix}Y_{i1}\\Y_{i2}\end{bmatrix}(Y_{i4} - Y_{i3} - \theta(Y_{i3} - Y_{i2}))
$

$
\psi_{23} = (Y_{i2} - Y_{i1}) * (Y_{i3} - \theta Y_{i2})
$

$
\psi_{24} = (Y_{i3} - Y_{i2}) * (Y_{i4} - \theta Y_{i3})
$

# Part C

Derivative of each moment defined as:

$
\frac{d\psi_{13}}{d \theta} = - Y_{i1} (Y_{i3} - Y_{i2})
$

$
\frac{d\psi_{14}}{d \theta} = - \begin{bmatrix}Y_{i1}\\Y_{i2}\end{bmatrix}(Y_{i3} - Y_{i2})
$

$
\frac{d\psi_{23}}{d \theta} = - Y_{i2} (Y_{i2} - Y_{i1})
$

$
\frac{d\psi_{24}}{d \theta} = - Y_{i3} (Y_{i3} - Y_{i2})
$

# Part D

In [4]:
theta = 0.3
# Moment functions (ordered same as above)
def moments(theta):
    M1 = df.iloc[:,0] * (df.iloc[:,2] - df.iloc[:,1] - theta * (df.iloc[:,1] - df.iloc[:,0]))
    M2 = df.iloc[:,0] * (df.iloc[:,3] - df.iloc[:,2] - theta * (df.iloc[:,2] - df.iloc[:,1]))
    M3 = df.iloc[:,1] * (df.iloc[:,3] - df.iloc[:,2] - theta * (df.iloc[:,2] - df.iloc[:,1]))
    M4 = (df.iloc[:,1] - df.iloc[:,0]) * (df.iloc[:,2] - theta * df.iloc[:,1])
    M5 = (df.iloc[:,2] - df.iloc[:,1]) * (df.iloc[:,3] - theta * df.iloc[:,2])
    return np.matrix([M1,M2,M3,M4,M5])

M = moments(theta)
M = M.mean(axis=1)

In [5]:
print('Average of each Moment:')
moment_list = ['psi_13','psi_14_0','psi_14_1','psi_23','psi_24']
M_df = pd.DataFrame(data=M,index=moment_list,columns=['@theta=0.3'])
print(M_df) 

Average of each Moment:
          @theta=0.3
psi_13      0.409843
psi_14_0    0.666327
psi_14_1    0.747435
psi_23      0.379638
psi_24      0.729407


# Part E

Evaluate objective function at theta = 0.3

In [6]:
I  = np.identity(5)
def obj_f(theta):
    M = moments(theta)
    M = M.mean(axis=1)
    M_T = M.T
    Q = np.sum((M_T.dot(I)).dot(M))
    return Q
print(obj_f(theta))

1.8467823652952895


# Part F

Evaluate derivative of moment function

dQ = 2 (M1 * dM1 + M2 * dM2 + M3 * dM3 + M4 * dM4 + M5 * dM5)

In [7]:
dM1 = - df.iloc[:,0] * (df.iloc[:,1] - df.iloc[:,0])
dM2 = - df.iloc[:,0] * (df.iloc[:,2] - df.iloc[:,1])
dM3 = - df.iloc[:,1] * (df.iloc[:,2] - df.iloc[:,1])
dM4 = - df.iloc[:,1] * (df.iloc[:,1] - df.iloc[:,0])
dM5 = - df.iloc[:,2] * (df.iloc[:,2] - df.iloc[:,1])
dM = np.matrix([dM1,dM2,dM3,dM4,dM5]).mean(axis=1)

M = moments(theta)
M = M.mean(axis=1)

dQ = 2 * (M.T.dot(dM))
print(dQ.sum())

-2.740291419731106


# Part G

Minimize the objective function and report the estimate

In [8]:
from scipy.optimize import minimize

model = minimize(obj_f,theta)

theta_hat = model.x
print(theta_hat)

[0.62720509]


# Part H

Using the estimated value of , estimate the optimal weight matrix.

In [9]:
# C = delta inverse
# delta = (1/N) * M * M' --> output of M x M matrix
n = len(df)
moment_vector = moments(theta_hat)
delta_hat = (1 / n) * moment_vector.dot(moment_vector.T)
c_hat  = np.linalg.inv(delta_hat)
c_df = pd.DataFrame(data=c_hat, index=moment_list,columns=moment_list)
c_df # optimal weight matrix

Unnamed: 0,psi_13,psi_14_0,psi_14_1,psi_23,psi_24
psi_13,0.020683,-0.02077,0.03247,0.050609,-0.012973
psi_14_0,-0.02077,0.073261,-0.087136,-0.091566,0.015943
psi_14_1,0.03247,-0.087136,0.117475,0.116842,-0.019979
psi_23,0.050609,-0.091566,0.116842,0.17916,-0.038865
psi_24,-0.012973,0.015943,-0.019979,-0.038865,0.023023


# Part I

Calculate the two-step GMM estimate by minimizing the objective function
with the estimated weight function, and estimate the standard error.

#### New GMM Estimate (Theta hat)

In [10]:
def obj_f_c(theta):
    M = moments(theta)
    M = M.sum(axis=1) / n
    M_T = M.T
    Q = np.sum((M_T.dot(c_hat)).dot(M))
    return Q

new_model = minimize(obj_f_c,theta_hat)

new_theta_hat = new_model.x.item()
print(new_theta_hat)

0.9432548846098653


#### Standard Error Estimate

$
\sqrt{N}\left(\hat{\theta}-\theta_{0}\right) \stackrel{d}{\rightarrow} N\left(0,\left(\Gamma^{\prime} C_{0} \Gamma\right)^{-1}\left(\Gamma^{\prime} C_{0} \Delta C_{0} \Gamma\right)\left(\Gamma^{\prime} C_{0} \Gamma\right)^{-1}\right)
$

In [11]:
new_moment_vector = moments(new_theta_hat)
delta_hat = (1 / n) * new_moment_vector.dot(new_moment_vector.T)
# c_hat  = np.linalg.inv(delta_hat)

part_1 = np.linalg.inv((dM.T.dot(c_hat)).dot(dM))
part_2 = (((dM.T.dot(c_hat)).dot(delta_hat)).dot(c_hat)).dot(dM)
var = (1/n) * (part_1 * part_2 * part_1)
std_err = var.item()**0.5
print(std_err)

0.12208960498299896
