In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [3]:
import statsmodels.api as sm

In [2]:
rng = np.random.default_rng(1)
x = rng.normal(size=100)
y = 2*x+rng.normal(size=100)

# 11(a)

In [4]:
# performing a simple regression of y onto x, without an intercept
result = sm.OLS(y, x).fit()
result.summary()

0,1,2,3
Dep. Variable:,y,R-squared (uncentered):,0.743
Model:,OLS,Adj. R-squared (uncentered):,0.74
Method:,Least Squares,F-statistic:,285.6
Date:,"Mon, 09 Oct 2023",Prob (F-statistic):,6.23e-31
Time:,16:22:40,Log-Likelihood:,-141.35
No. Observations:,100,AIC:,284.7
Df Residuals:,99,BIC:,287.3
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
x1,1.9762,0.117,16.898,0.000,1.744,2.208

0,1,2,3
Omnibus:,1.376,Durbin-Watson:,2.184
Prob(Omnibus):,0.503,Jarque-Bera (JB):,0.847
Skew:,0.121,Prob(JB):,0.655
Kurtosis:,3.381,Cond. No.,1.0


From above we can see that,
1) coefficient estimate beta1 is 1.9762.
2) The standard error of this coefficient estimate is 0.117
3) t-statistic is 16.898
4) p-value associated with null hypothesis is almost 0.

Due to standard distributed normal errors, coefficient is a bit understated from 2.
Therefore we can reject null hypothesis for this coefficient estimate.

# 11(b)

In [5]:
# Now without an intercept but x onto y
result1 = sm.OLS(x, y).fit()
result1.summary()

0,1,2,3
Dep. Variable:,y,R-squared (uncentered):,0.743
Model:,OLS,Adj. R-squared (uncentered):,0.74
Method:,Least Squares,F-statistic:,285.6
Date:,"Mon, 09 Oct 2023",Prob (F-statistic):,6.23e-31
Time:,16:30:40,Log-Likelihood:,-58.349
No. Observations:,100,AIC:,118.7
Df Residuals:,99,BIC:,121.3
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
x1,0.3757,0.022,16.898,0.000,0.332,0.420

0,1,2,3
Omnibus:,13.156,Durbin-Watson:,2.034
Prob(Omnibus):,0.001,Jarque-Bera (JB):,22.596
Skew:,-0.528,Prob(JB):,1.24e-05
Kurtosis:,5.075,Cond. No.,1.0


From above we can see that,
1) coefficient estimate beta is 0.3757.
2) The standard error of this coefficient estimate is 0.022.
3) t-statistic is 16.898
4) p-value associated with null hypothesis is almost 0.

Again the slope is a bit underestimated due to the standard distributed normal errors.
Therefore we can reject null hypothesis for this coefficient estimate.

# 11(c)

From above we can see that t-statistic value is the same and hence the p-value referrring to that coeff. estimate

# 11(d)

Algebraically verified

In [6]:
x, y

(array([ 3.45584192e-01,  8.21618144e-01,  3.30437076e-01, -1.30315723e+00,
         9.05355867e-01,  4.46374572e-01, -5.36953235e-01,  5.81118104e-01,
         3.64572396e-01,  2.94132497e-01,  2.84222413e-02,  5.46712987e-01,
        -7.36454087e-01, -1.62909948e-01, -4.82119313e-01,  5.98846213e-01,
         3.97221075e-02, -2.92456751e-01, -7.81908462e-01, -2.57192241e-01,
         8.14218052e-03, -2.75602905e-01,  1.29406381e+00,  1.00672432e+00,
        -2.71116248e+00, -1.88901325e+00, -1.74772092e-01, -4.22190412e-01,
         2.13642997e-01,  2.17321931e-01,  2.11783876e+00, -1.11202076e+00,
        -3.77605007e-01,  2.04277161e+00,  6.46702996e-01,  6.63063372e-01,
        -5.14006372e-01, -1.64807517e+00,  1.67464744e-01,  1.09014088e-01,
        -1.22735205e+00, -6.83226662e-01, -7.20436797e-02, -9.44751623e-01,
        -9.82699679e-02,  9.54830275e-02,  3.55862371e-02, -5.06291658e-01,
         5.93748072e-01,  8.91166954e-01,  3.20848305e-01, -8.18230227e-01,
         7.3

In [8]:
# confirming numerically
sum_xi_yi = np.dot(x, y)
sum_xi_2 = np.dot(x, x)
sum_yi_2 = np.dot(y, y)
sum_xiyi_2 = sum_xi_yi**2
t_statistic = (sum_xi_yi*(99**0.5))/(sum_xi_2*sum_yi_2-sum_xiyi_2)**0.5
t_statistic

16.8984170630351

Hence, verified.

# 11(e)

We see that when xi and yi are reversed the result stays the same, hence it is symmetric and that's why t-statistic for both regression is the same

# 11(f)

In [9]:
x1 = pd.DataFrame({"intercept":np.ones(100), "predictor1":x})

In [11]:
x1

Unnamed: 0,intercept,predictor1
0,1.0,0.345584
1,1.0,0.821618
2,1.0,0.330437
3,1.0,-1.303157
4,1.0,0.905356
...,...,...
95,1.0,-2.250854
96,1.0,-0.138655
97,1.0,0.033000
98,1.0,-1.425349


In [10]:
result2 = sm.OLS(y, x1).fit()
result2.summary()

0,1,2,3
Dep. Variable:,y,R-squared:,0.741
Model:,OLS,Adj. R-squared:,0.738
Method:,Least Squares,F-statistic:,280.0
Date:,"Mon, 09 Oct 2023",Prob (F-statistic):,1.74e-30
Time:,18:47:25,Log-Likelihood:,-141.06
No. Observations:,100,AIC:,286.1
Df Residuals:,98,BIC:,291.3
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
intercept,-0.0760,0.101,-0.756,0.451,-0.276,0.124
predictor1,1.9686,0.118,16.734,0.000,1.735,2.202

0,1,2,3
Omnibus:,1.277,Durbin-Watson:,2.198
Prob(Omnibus):,0.528,Jarque-Bera (JB):,0.759
Skew:,0.114,Prob(JB):,0.684
Kurtosis:,3.361,Cond. No.,1.2


In [13]:
y1 = pd.DataFrame({"intercept":np.ones(100), "predictor_y":y})
y1

Unnamed: 0,intercept,predictor_y
0,1.0,0.039887
1,1.0,2.505681
2,1.0,0.535282
3,1.0,-1.937161
4,1.0,3.029555
...,...,...
95,1.0,-6.119176
96,1.0,0.832327
97,1.0,0.234106
98,1.0,-2.302292


In [14]:
result2_vice_versa = sm.OLS(x, y1).fit()
result2_vice_versa.summary()

0,1,2,3
Dep. Variable:,y,R-squared:,0.741
Model:,OLS,Adj. R-squared:,0.738
Method:,Least Squares,F-statistic:,280.0
Date:,"Mon, 09 Oct 2023",Prob (F-statistic):,1.74e-30
Time:,18:49:04,Log-Likelihood:,-58.325
No. Observations:,100,AIC:,120.6
Df Residuals:,98,BIC:,125.9
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
intercept,0.0095,0.044,0.216,0.829,-0.078,0.097
predictor_y,0.3763,0.022,16.734,0.000,0.332,0.421

0,1,2,3
Omnibus:,13.123,Durbin-Watson:,2.035
Prob(Omnibus):,0.001,Jarque-Bera (JB):,22.501
Skew:,-0.528,Prob(JB):,1.3e-05
Kurtosis:,5.07,Cond. No.,1.98


### Hence verified that even after including intercept, t-statistic of Ho:B1=0 remains same for both regression of y onto x and regression of x onto y.