In [1]:
%matplotlib inline

from scipy.stats import chi2, norm

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import statsmodels.api as sm

# Task 1

The logic that overlapping confidence intervals imply equal parameters is incorrect because confidence intervals represent the range of plausible values for a parameter, not the true value. Overlapping intervals only indicate that the true values of the two parameters are not statistically significantly different from each other. The researcher could check if difference between this two parameters is zero and construct CI for that.

# Task 4

Sum of coefficients = 1 means that Coub_Douglas function exhibits CRS (because those are practically degrees of labour, capital and fuel in the production function). They also represent the share of the output that will will remunerate certain means of production.

In [2]:
data = pd.read_csv("Nerlove1963.csv").apply(np.log)
y = data['Cost'].values
x = data.drop(['Cost'], axis=1).values

In [3]:
X = sm.add_constant(x) #regressors
xtxi = np.linalg.inv(X.T @ X)
beta = xtxi @ X.T @ y #vector of estimated coefficients

yh = X @ beta #target values
eh = y - yh

n = X.shape[0]
Q = X.T @ X / n
Qi = np.linalg.inv(Q)

V = X.T @ np.diag(eh ** 2) @ X / n
V_beta = Qi @ V @ Qi

pd.DataFrame(V_beta, columns=['b1', 'b2', 'b3', 'b4', 'b5'], index=['b1', 'b2', 'b3', 'b4', 'b5'])

Unnamed: 0,b1,b2,b3,b4,b5
b1,413.502348,-1.922495,-20.358547,-76.922398,2.950307
b2,-1.922495,0.148764,-0.193792,0.211728,-0.033674
b3,-20.358547,-0.193792,8.447172,3.918115,-1.200389
b4,-76.922398,0.211728,3.918115,14.679601,-0.856086
b5,2.950307,-0.033674,-1.200389,-0.856086,0.79767


In [4]:
betas = ['b1', 'b2', 'b3', 'b4', 'b5']
for k in range (0, 5):
  print(betas[k] + ":", beta[k])

b1: -3.5265028449952864
b2: 0.720394075879717
b3: 0.4363412007898495
b4: -0.21988835075390706
b5: 0.4265169530626896


In [5]:
print("Standard Errors:")
for i, j in zip(np.diag(V_beta / n) ** 0.5, betas):
  print(j + ":", i)

Standard Errors:
b1: 1.6887096637876358
b2: 0.03203057017941685
b3: 0.24136354235688104
b4: 0.31818018431936834
b5: 0.0741698682070479


## t-test

In [6]:
R = np.array([[0, 0, 1, 1, 1]])
q = 1
t = np.sqrt(x.shape[0]) * (R @ beta - q) / np.sqrt(R @ V_beta @ R.T)
t = t[0, 0]
print ("t-statistic" + ':', t)
print("t-test p-value: ", norm.cdf(t))

t-statistic: -0.8176345924143603
t-test p-value:  0.20678293442072337


## Wald test

In [7]:
R = np.array([[0, 0, 1, 1, 1]])
q = 1
W = x.shape[0] * (R @ beta - q).T @ np.linalg.inv(R @ V_beta @ R.T) @ (R @ beta - q)
print ("Wald statistic" + ':', W)
print("Wald test p-value: ", chi2.sf(W, 1))

Wald statistic: 0.668526326712597
Wald test p-value:  0.4135658688414465
