# Production function estimation

In [1]:
import numpy as np
import pandas as pd
from scipy.optimize import minimize

### Data

In [2]:
data = pd.read_csv('Data20190514.csv', header=None)
data.columns = ['firm ID', 'year', 'logY', 'logL', 'logK', 'logM', 'invest', 'exit']
data.head(10)

Unnamed: 0,firm ID,year,logY,logL,logK,logM,invest,exit
0,1,0,14.833649,4.912655,14.476757,14.756739,-9.0,0
1,1,1,15.183484,5.09375,14.325154,13.311428,21.090775,0
2,1,2,14.071942,4.905275,14.557183,13.113793,69.504303,0
3,1,3,14.655201,4.672829,14.232835,13.069642,60.508053,0
4,1,4,13.756456,4.744932,14.26612,12.881343,25.486347,0
5,1,5,14.348527,4.61512,14.028557,13.121171,44.889374,0
6,1,6,14.498944,4.624973,14.201223,13.066875,28.312925,0
7,2,0,12.799171,3.465736,11.654267,12.423535,-9.0,0
8,2,1,12.332752,3.555348,11.662062,12.14505,7.962585,0
9,2,2,12.668156,3.7612,11.795184,12.050364,3.983317,0


In [3]:
num_rows = len(data['firm ID'])
num_rows

13832

## OLS

In [4]:
# OLS
x = np.stack([np.ones(num_rows),
                          np.asarray(data.loc[:, 'logK'].values),
                          np.asarray(data.loc[:, 'logL'].values)], axis=1)
y = np.asarray(data.loc[:, 'logY'].values)
res = np.linalg.solve(np.dot(x.T, x), np.dot(x.T, y))
res

array([5.56238117, 0.38576301, 0.74349085])

## Fixed Effect

In [5]:
# Fixed Effect
data.loc[:, 'logY t-1'] = data['logY'].shift()
data.loc[:, 'logK t-1'] = data['logK'].shift()
data.loc[:, 'logL t-1'] = data['logL'].shift()
data_fx = data[data['year'] != 0]
x = np.stack([data_fx.loc[:, 'logK'].values - data_fx.loc[:, 'logK t-1'].values,
                        data_fx.loc[:, 'logL'].values - data_fx.loc[:, 'logL t-1'].values], axis=1)
y = data_fx.loc[:, 'logY'].values - data_fx.loc[:, 'logY t-1'].values
res = np.linalg.solve(np.dot(x.T, x), np.dot(x.T, y))
res

array([0.03983651, 0.241837  ])

## Olley and Pakes(1996)

### Step 1

In [6]:
# Olley and Pakes approarch
data.loc[:, 'i square'] = data.loc[:, 'invest'].values**2
data.loc[:, 'k square'] = data.loc[:, 'logK'].values**2
data.loc[:, 'i quad'] = data.loc[:, 'invest'].values**3
data.loc[:, 'k quad'] = data.loc[:, 'logK'].values**3
data.loc[:, 'i k'] = data.loc[:, 'invest'].values * data.loc[:, 'logK'].values
data.loc[:, 'i2 k'] = data.loc[:, 'invest'].values**2 * data.loc[:, 'logK'].values
data.loc[:, 'i k2'] = data.loc[:, 'invest'].values * data.loc[:, 'logK'].values**2

In [7]:
data = data[data['year'] != 0]
num_rows = len(data['year'])
x = np.stack([np.ones(num_rows),
                         data.loc[:, 'logL'].values,
                         data.loc[:, 'invest'].values,
                         data.loc[:, 'logK'].values,
                         data.loc[:, 'i square'].values,
                         data.loc[:, 'k square'].values,
                         data.loc[:, 'i k'].values,
                         data.loc[:, 'i quad'].values,
                         data.loc[:, 'k quad'].values,
                         data.loc[:, 'i2 k'].values,
                         data.loc[:, 'i k2'].values], axis=1)
y = data.loc[:, 'logY'].values
res = np.linalg.solve(np.dot(x.T, x), np.dot(x.T, y))
res

array([ 2.31582008e+01,  6.56627485e-01,  6.93829304e-02, -4.11695433e+00,
       -1.68131304e-07,  3.75930971e-01, -9.37864655e-03,  1.35408548e-11,
       -1.00441233e-02,  4.04384887e-08,  3.10640367e-04])

In [8]:
# Coefficient of Labor
coef_l = res[1]

### Step 2

In [9]:
data.loc[:, 'y_hat'] = np.dot(x, res)
data.loc[:, 'LHS'] = data.loc[:, 'logY'].values - data.loc[:, 'logL'].values*coef_l
data.loc[:, 'phi'] = data.loc[:, 'y_hat'].values - data.loc[:, 'logL'].values*coef_l
data.loc[:, 'phi t-1'] = data.loc[:, 'phi'].shift()

In [10]:
data_op = data[data['year'] != 1]

In [11]:
LHS = data_op.loc[:, 'LHS'].values
logK = data_op.loc[:, 'logK'].values
phi_t1 = data_op.loc[:, 'phi t-1'].values
logK_t1 = data_op.loc[:, 'logK t-1'].values
n = len(LHS)

In [12]:
def SSR(beta):
    total = 0
    for i in range(n):
        total += (LHS[i] - beta[0] - beta[1]*logK[i] \
                      - beta[2]*(phi_t1[i] - beta[0] - beta[1]*logK_t1[i]) \
                      - beta[3]*(phi_t1[i] - beta[0] - beta[1]*logK_t1[i])**2)**2
    return total

In [13]:
%%time
beta = np.ones(4)
res = minimize(SSR, beta, method='BFGS')

CPU times: user 25.2 s, sys: 107 ms, total: 25.3 s
Wall time: 25.5 s


In [14]:
res

      fun: 4178.102528602588
 hess_inv: array([[ 3.13963915e-02, -2.39725869e-03, -2.39135045e-03,
        -1.07384495e-03],
       [-2.39725869e-03,  3.05097238e-04, -1.00836882e-04,
         2.52373203e-05],
       [-2.39135045e-03, -1.00836882e-04,  1.33551319e-03,
         7.67456255e-04],
       [-1.07384495e-03,  2.52373203e-05,  7.67456255e-04,
         1.22505620e-03]])
      jac: array([ 0.00073242,  0.00091553,  0.00250244, -0.00445557])
  message: 'Desired error not necessarily achieved due to precision loss.'
     nfev: 725
      nit: 45
     njev: 119
   status: 2
  success: False
        x: array([ 6.11439535,  0.40474144,  0.84183422, -0.26901726])

$\beta_k=0.40474144$