In [1]:
# Import package for getting dataset example
import wooldridge as woo

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import scipy.stats as stats
import statsmodels.api as sm
import statsmodels.formula.api as smf

import scipy.stats as stats

import math

  from pandas.core import (


# The t-test

![image](images/4_t-diagram.png)

![image](images/4_t-table1.png)

![image](images/4_t-table2.png)

### Confidence Interval

$$
\hat{\beta}_j \pm \text{t}_{crit} \text{se}({\hat{\beta}_j})
$$

where,
- $ \text{se}(\hat{\beta}_j) = \frac{\hat{\sigma}_{rg}}{\sqrt{n} \sqrt{1 - R_j^2} \, sd(x_j)} $
- $sd(x_j) = \sqrt{1/n \sum_{i=1}^{n} (x_{i, j} - \bar{x})^2}$

### Case Estimating t-value and p-value

![image](images/Example_4-1.png)

In [2]:
# Using summary model
wage1 = woo.dataWoo('wage1')

model = smf.ols(formula='np.log(wage) ~ educ + exper + tenure', data=wage1).fit()
print(model.summary())

                            OLS Regression Results                            
Dep. Variable:           np.log(wage)   R-squared:                       0.316
Model:                            OLS   Adj. R-squared:                  0.312
Method:                 Least Squares   F-statistic:                     80.39
Date:                Sat, 31 Aug 2024   Prob (F-statistic):           9.13e-43
Time:                        16:12:26   Log-Likelihood:                -313.55
No. Observations:                 526   AIC:                             635.1
Df Residuals:                     522   BIC:                             652.2
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      0.2844      0.104      2.729      0.0

In [70]:
# Calculating manual (This model assume it's two-tail hypothesis testing)

regressors = ['educ', 'exper', 'tenure']
target = 'wage'
n = int(wage1.shape[0])
df = n - len(regressors) - 1

params = {}
se_params = {}
t_values = {}
p_values = {}

# Extract X and y
X = pd.DataFrame(np.ones(n, dtype=int))
X = pd.concat([X, wage1[regressors]], axis=1)
y = wage1[target].apply(lambda x: np.log(x))

# Parameters estimates
X = np.array(X)
y = np.array(y).reshape(n, 1)
b = np.linalg.inv(X.T@X) @ X.T @ y

# Store params
for i in range(b.shape[0]):
    if i == 0:
        params['intercept'] = b[i, 0]
    else:
        params[f'{regressors[i - 1]}'] = b[i, 0]
print(f"Beta: \n{params}")

# Residuals, estiamted variance of residuals and SER
residuals = y - X @ b
var_residuals = (residuals.T @ residuals) / df
SER = np.sqrt(var_residuals)[0][0]

# Estimated variance of the parameters estiamtors and SE
var_beta = var_residuals * np.linalg.inv(X.T @ X)
std_error_beta = np.sqrt(np.diagonal(var_beta))

# Store std error beta
for i in range(len(std_error_beta)):
    if i == 0:
        se_params['intercept'] = std_error_beta[i]
    else:
        se_params[f'{regressors[i - 1]}'] = std_error_beta[i]
    
print(f"Std Error Params: \n", se_params)

# Regressors
for variable in ['intercept'] + regressors:
    t_value_temp = params[f'{variable}'] / se_params[f"{variable}"]
    t_values[f'{variable}'] = t_value_temp
    p_values[f'{variable}'] = 2 * (1 - stats.t.cdf(t_value_temp, df))
    
    
print("t-values:\n", t_values)
print("p-values:\n", p_values)

Beta: 
{'intercept': 0.2843595552360746, 'educ': 0.0920289867692827, 'exper': 0.004121109045609959, 'tenure': 0.022067217434724347}
Std Error Params: 
 {'intercept': 0.10419037797067718, 'educ': 0.0073299232744946695, 'exper': 0.0017232772008454382, 'tenure': 0.0030936491910178287}
t-values:
 {'intercept': 2.7292304795756026, 'educ': 12.555245576649948, 'exper': 2.3914371080799692, 'tenure': 7.133070387810876}
p-values:
 {'intercept': 0.006562462394036572, 'educ': 0.0, 'exper': 0.01713562312471195, 'tenure': 3.2944758032726895e-12}


### Case Confidence Interval

![image](images/Example_4-8.png)

In [83]:
rdchem = woo.dataWoo('rdchem')
regressors = ['sales', 'profmarg']
target = 'rd'
n = rdchem.shape[0]
df = n - len(rdchem) - 1
alpha = np.array([0.05, 0.01])

# Storage
params = {}
se_params = {}
CI_params = {}


# Calculate critical values
c_values = stats.t.ppf(1 - alpha, 522)

# Extract X and y
X = pd.DataFrame(np.ones(n, dtype=int))
X = pd.concat([X, rdchem[regressors]], axis=1)
X['sales'] = X['sales'].apply(lambda x: np.log(x))

y = rdchem[target].apply(lambda x: np.log(x))

# Parameters estimates
X = np.array(X)
y = np.array(y).reshape(-1, 1)
b = np.linalg.inv(X.T @ X) @ X.T @ y
# Store params
for i in range(b.shape[0]):
    if i == 0:
        params['intercept'] = b[i, 0]
    else:
        params[f'{regressors[i - 1]}'] = b[i, 0]

# Estimate std error parameters
residuals = y - X @ b
var_residuals = (residuals.T @ residuals) / df
var_beta = var_residuals * np.linalg.inv(X.T @ X)
std_error_beta = np.sqrt(np.diagonal(var_beta))

# Store std error beta
# for i in range(len(std_error_beta)):
#     if i == 0:
#         se_params['intercept'] = std_error_beta[i]
#     else:
#         se_params[f'{regressors[i - 1]}'] = std_error_beta[i]


# for c in c_values:

print(var_residuals)

[[-7.65050979]]


  std_error_beta = np.sqrt(np.diagonal(var_beta))


In [84]:
X

array([[ 1.        ,  8.42731229,  4.08953619],
       [ 1.        ,  7.94803199, 16.5017662 ],
       [ 1.        ,  6.39158203, 17.99597931],
       [ 1.        ,  4.89485031, -3.21856284],
       [ 1.        ,  3.73766962, 19.04761887],
       [ 1.        ,  5.96614674, 12.1282053 ],
       [ 1.        ,  4.5422304 ,  0.95846641],
       [ 1.        ,  6.81113427,  8.52516747],
       [ 1.        ,  9.89207265, 12.96212006],
       [ 1.        , 10.58933314, 10.46110439],
       [ 1.        ,  7.98497367,  3.19087338],
       [ 1.        ,  7.82955085, 14.13795757],
       [ 1.        ,  7.02536056,  4.02738237],
       [ 1.        ,  6.82611126,  0.44487846],
       [ 1.        ,  7.79671596,  5.44273567],
       [ 1.        ,  8.8178902 ,  4.87118721],
       [ 1.        ,  6.97195004, 27.18746948],
       [ 1.        ,  8.07087481,  5.10015965],
       [ 1.        ,  5.01063529, 21.73333168],
       [ 1.        ,  6.23382234, 16.44104385],
       [ 1.        ,  7.28117914, 18.668