## Solution code for 8.38

In [2]:
import pandas as pd
import numpy as np

# Read the data
df = pd.read_csv('rent.csv')

# Separate the data into X and y
X = df[['X1','X2','X3']]
y = df['y']

# Import statsmodels
import statsmodels.api as sm

# add a constant to X
X = sm.add_constant(X)

#### (a) Test the overall regression hypothesis $H_0 : \beta_1=0$
Reject $H_0$ : p-val $<0.05$

In [36]:
# Test beta_1 = 0, beta_2 = 0, beta_3 = 0
model = sm.OLS(y, X)
results = model.fit()
print(results.fvalue)
print(results.f_pvalue < 0.05)

65.03660360494278
True


#### (b) Test $H_0 : \beta_j = 0 \;\;\text{for}\;\;j=1,2,3$
- t test : reject $H_0$ for $\beta_1, \beta_2$
- bonferroni approach : reject $H_0$ for $\beta_1, \beta_2$

In [11]:
# Find whether p-value of beta_1, beta_2, beta_3 are all less than 0.05
print(results.pvalues < 0.05)
# Find whether p-value of beta_1, beta_2, beta_3 are all less than 0.05/3
print(results.pvalues < 0.05/3)

const    False
X1        True
X2        True
X3       False
dtype: bool
const    False
X1        True
X2        True
X3       False
dtype: bool


#### (c) Find confidence intervals for $\beta_1,\beta_2,\beta_3$ using original method and bonferroni method both

In [12]:
# 95% confidence interval of beta_1, beta_2, beta_3
print(results.conf_int(0.05))

               0          1
const  -9.209292  10.534824
X1      0.614812   0.945730
X2      0.247544   0.758696
X3    -44.865645  10.665152


In [34]:
# upper bound for bonferonni confidence interval
# get t 0.00833 quantile with dof 30 from t-table
t = 2.5357

# standard error for each beta
se = results.bse

# confidence interval = beta +- t * se
print('CI for beta_1: ', results.params[1] - t * se[1], results.params[1] + t * se[1])
print('CI for beta_2: ', results.params[2] - t * se[2], results.params[2] + t * se[2])
print('CI for beta_3: ', results.params[3] - t * se[3], results.params[3] + t * se[3])

CI for beta_1:  0.5748355807891872 0.9857063765544931
CI for beta_2:  0.18579526441438415 0.8204445460690801
CI for beta_3:  -51.57396238054848 17.373469499409378


#### (d) Find a 95% confidence interval for $E(y_0) = \mathbf{x^T_0\beta}$

In [33]:
# confidence interval for E[y_0] at x_0 = [1,15,30,0.5]
x_0 = [1,15,30,0.5]
Ey_0 = np.dot(x_0, results.params)
# two-tailed t 0.05 quantile with dof 30 from t-table
t = 2.042
# standard error for Ey_0
se = np.sqrt(np.dot(np.dot(x_0, results.cov_params()), x_0))
# confidence interval = Ey_0 +- t * se
print('upper bound for E[y_0]: ', Ey_0 + t * se)
print('lower bound for E[y_0]: ', Ey_0 - t * se)

upper bound for E[y_0]:  25.677130203356864
lower bound for E[y_0]:  12.143478925003503


#### (e) Find a 95% prediction interval for $y_0 = \mathbf{x^T_0\beta} + \epsilon$

In [30]:
# 95% prediction interval for y_0
x_0 = [1,15,30,0.5]
result_pred = results.get_prediction(x_0).conf_int(0.05)

print('upper bound for y_0: ', result_pred[0][1])
print('lower bound for y_0: ', result_pred[0][0])

upper bound for y_0:  37.059925394016474
lower bound for y_0:  0.7606837343438961
