## Solution code for 7.54 with Python

In [2]:
import pandas as pd
import numpy as np

# Read in the data
df = pd.read_csv('data.csv')

X, y = df[['X1', 'X2', 'X3']].values, df[['y1']].values # separate the data into X and y

# add a constant column to X for the intercept term
X = np.column_stack((np.ones(len(X)), X))

#### (a) Find $\hat\beta$ and $s^2$
- $\beta = (332.111\;-1.546\;-1.425\;-2.237)^T$
- $s^2 = 5.345$

In [3]:
# calculate the OLS estimator for beta
beta = np.linalg.inv(X.T @ X) @ X.T @ y

# Mean squared error of y1
y_hat = X @ beta
y_mse = np.sum((y_hat - y)**2) / (len(y) - 4)

# print the estimated coefficients (beta values)
print(beta.round(3))
print(y_mse.round(3))

[[332.111]
 [ -1.546]
 [ -1.425]
 [ -2.237]]
5.345


#### (b) Find an estimate of $cov(\hat\beta)$
Used that
$$
\hat cov(\hat\beta) = \hat\sigma^2(\mathbf{X^TX})^{-1} = s^2(\mathbf{X^TX})^{-1}
$$

In [4]:
# Calculate the inverse of X^T*X
XtX_inv = np.linalg.inv(X.T @ X)

# Calculate the estimate of covariance matrix of beta
cov_beta = y_mse * XtX_inv

print(cov_beta.round(3))

[[ 3.49426e+02 -1.81100e+00 -1.67000e+00 -1.09000e-01]
 [-1.81100e+00  1.00000e-02  7.00000e-03 -2.00000e-03]
 [-1.67000e+00  7.00000e-03  2.20000e-02 -9.00000e-03]
 [-1.09000e-01 -2.00000e-03 -9.00000e-03  1.15000e-01]]


#### (c) Find $R^2$ and $R^2_a$
Used that for $p$ covariates model(except constant term)
$$
R^2_a = 1-(1-R^2)\frac{n-1}{n-p-1}
$$

$$
R^2 = 0.955 \\ R^2_a =0.946
$$

In [5]:
# Find the R^2 value
y_bar = np.mean(y)
SST = np.sum((y - y_bar)**2)
SSR = np.sum((y_hat - y_bar)**2)
R2 = SSR / SST
print(R2.round(3))

# Find the adjusted R^2 value
n = len(y)
p = 3
adj_R2 = 1 - (1 - R2) * (n - 1) / (n - p - 1)
print(adj_R2.round(3))

0.955
0.946


#### (d) In order to find the maximum yield for $y_1$, a second-order model is of interest. Find $\hat\beta$ and $s^2$ for the model

In [6]:
# Second order polynomial model (X1^2, X2^2, X3^2, X1*X2, X1*X3, X2*X3)
X2 = np.column_stack((X, X[:, 1]**2, X[:, 2]**2, X[:, 3]**2,
                      X[:, 1]*X[:, 2], X[:, 1]*X[:, 3], X[:, 2]*X[:, 3]))

# Calculate the OLS estimator for beta
beta2 = np.linalg.inv(X2.T @ X2) @ X2.T @ y

# Mean squared error of y1
y_hat2 = X2 @ beta2
y_mse2 = np.sum((y_hat2 - y)**2) / (len(y) - 10)

# print the estimated coefficients (beta values)
print(beta2.round(3))

# print the mean squared error
print(y_mse2.round(3))

[[ 9.64929e+02]
 [-7.44200e+00]
 [-1.15080e+01]
 [-2.14000e+00]
 [ 1.20000e-02]
 [ 3.30000e-02]
 [-2.94000e-01]
 [ 5.40000e-02]
 [ 3.80000e-02]
 [-1.02000e-01]]
5.134


#### (e) Find the $R^2$ and $R^2_a$ for the second-order model
Result
$$
R^2 = 0.974 \\
R^2_a = 0.948
$$

In [7]:
# Calculate the R^2 value
y_bar = np.mean(y)
SST = np.sum((y - y_bar)**2)
SSR = np.sum((y_hat2 - y_bar)**2)
R2 = SSR / SST
print(R2.round(3))

# Find the adjusted R^2 value
n = len(y)
p = 9
adj_R2 = 1 - (1 - R2) * (n - 1) / (n - p - 1)
print(adj_R2.round(3))

0.974
0.948
