# Wald Test

In [1]:
import tprstats
import pandas as pd
import numpy as np
import statsmodels.api as sm
import scipy.stats as stats
import statsmodels.formula.api as smf
coffee_data = pd.read_excel('../data/Coffee_Data.xlsx', engine='openpyxl')
solar_data = pd.read_excel('../data/Solar_Data.xlsx', engine='openpyxl')

In [2]:
model_coffee_ts = tprstats.model(name = "ts", formula='cons~rpcarb+rpcinc+rpcofe', data=coffee_data)
model_coffee_ts.summary

<bound method _StatsmodelsModelWrapper.summary of <tprstats.models.TimeSeriesLinearModel object at 0x0000021241CCDBE0>>

The p-value for the wald test `hypothesis ='rpcofe=-0.013","rpcarb=0.08"` 
- using a model fitted with `cov_type='HAC'` should be p=0.3896
- Using a model fitted with `cov_type='H'` should be p=0.1826.

In [3]:
model_coffee_ts.wald_test('rpcofe=-0.013, rpcarb=0.08')

Wald Test Statistic:  1.1509055124120415
p-value:  0.32359114251321136


In [4]:
# trying use_correction=True
model_wald = smf.ols(formula='cons ~ rpcarb + rpcinc + rpcofe', data=coffee_data).fit(cov_type='HAC', cov_kwds={'maxlags': 1})
model_wald.wald_test('rpcofe=-0.013, rpcarb=0.08', use_f=True, scalar=True)

<class 'statsmodels.stats.contrast.ContrastResults'>
<F test: F=1.1509055124120415, p=0.32359114251321136, df_denom=57, df_num=2>

In [5]:
model_wald.wald_test_terms(scalar=True)

<class 'statsmodels.stats.contrast.WaldTestResults'>
                 chi2        P>chi2  df constraint
Intercept    0.001150  9.729493e-01              1
rpcarb     167.442828  2.677334e-38              1
rpcinc      17.230706  3.310424e-05              1
rpcofe       6.614386  1.011582e-02              1

## Scipy

In [6]:
X = coffee_data[['rpcarb', 'rpcinc', 'rpcofe']]
X = sm.add_constant(X)
y = coffee_data['cons']
model_scipy = sm.OLS(y, X).fit(cov_type='HAC', cov_kwds={'maxlags': 1})

In [7]:
null_values = np.array([-0.013, 0.08])
wald_statistic= np.sum((model_scipy.params[1:3] - null_values)**2) / np.diag(model_scipy.cov_params()[1:3])
p_value = stats.chi2.sf(wald_statistic, df=2)
print("Wald Test Statistic", wald_statistic)
print("p-value", p_value)

Wald Test Statistic [-1.12328708 63.59479699]
p-value [1.00000000e+00 1.55083393e-14]


## Binary Choice Wald Tests 


In [8]:
solar_logit=tprstats.model("logit",formula="Sale ~ AnnualSave + AnnualCost + DrivesHybrid + NearbySolar + HomeValue",data=solar_data)

Optimization terminated successfully.
         Current function value: 0.102926
         Iterations 9


In [10]:
solar_logit.wald_test('AnnualSave=4 , AnnualCost=-3')

Wald Test Statistic:  0.6714099320890368
p-value:  0.7148339750029004
