# Regression Tests

In [19]:
import tprstats
import pandas as pd
import numpy as np
import statsmodels.api as sm
import scipy.stats as stats
import statsmodels.formula.api as smf
import matplotlib.pyplot as plt
coffee_data = pd.read_excel('../data/Coffee_Data.xlsx', engine='openpyxl')
diamonds_211 = pd.read_excel('../data/Diamonds_211.xlsx', engine='openpyxl')
solar_data = pd.read_excel('../data/Solar_Data.xlsx', engine='openpyxl')
solar_new_prospects = pd.read_excel('../data/Solar_New_Prospects.xlsx', engine='openpyxl')

## Wald Tests
### Linear Models
#### Cross-section aka coefTestH

In [20]:
model_diamonds = tprstats.model("cs", 'Price ~ Carat + C(Clarity)', diamonds_211)
model_diamonds.wald_test('C(Clarity)[T.5] = C(Clarity)[T.6]')

Wald Test Statistic:  1.3686960207841596
p-value:  0.2434010493556541


#### Time Series Wald Test aka coefTestHAC

In [21]:
model_coffee_ts = tprstats.model(name = "ts", formula='cons~rpcarb+rpcinc+rpcofe', data=coffee_data)
model_coffee_ts.summary()

0,1,2,3
Dep. Variable:,cons,R-squared:,0.956
Model:,OLS,Adj. R-squared:,0.953
No. Observations:,61,F-statistic:,287.1
Covariance Type:,HAC,Prob (F-statistic):,2.33e-34

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,0.0576,1.698,0.034,0.973,-3.342,3.457
rpcarb,0.0901,0.007,12.940,0.000,0.076,0.104
rpcinc,0.1235,0.030,4.151,0.000,0.064,0.183
rpcofe,-0.0126,0.005,-2.572,0.013,-0.022,-0.003


In [22]:
model_coffee_ts.wald_test('rpcofe=-0.013, rpcarb=0.08')

Wald Test Statistic:  1.1509055124121625
p-value:  0.3235911425131734


### Binary Choice Wald Tests 
#### Logit Wald Test

In [23]:
solar_logit=tprstats.model("logit",formula="Sale ~ AnnualSave + AnnualCost + DrivesHybrid + NearbySolar + HomeValue",data=solar_data)

Optimization terminated successfully.
         Current function value: 0.102926
         Iterations 9


In [24]:
solar_logit.wald_test('AnnualSave=4 , AnnualCost=-3')

Wald Test Statistic:  0.6714099320890222
p-value:  0.7148339750029056


#### Probit Wald Test

In [25]:
solar_probit=tprstats.model("probit",formula="Sale ~ AnnualSave + AnnualCost + DrivesHybrid + NearbySolar + HomeValue",data=solar_data)
print(solar_probit.summary())

Optimization terminated successfully.
         Current function value: 0.100579
         Iterations 9
                          Probit Regression Results                           
Dep. Variable:                   Sale   No. Observations:                 1500
Model:                         Probit   Df Residuals:                     1494
Method:                           MLE   Df Model:                            5
Date:                Fri, 13 Dec 2024   Pseudo R-squ.:                  0.3010
Time:                        17:24:34   Log-Likelihood:                -150.87
converged:                       True   LL-Null:                       -215.84
Covariance Type:            nonrobust   LLR p-value:                 2.450e-26
                   coef    std err          z      P>|z|      [0.025      0.975]
--------------------------------------------------------------------------------
Intercept       -6.5365      2.307     -2.833      0.005     -11.058      -2.015
AnnualSave       1.9807

In [26]:
solar_probit.wald_test('AnnualSave=2.3 , AnnualCost=-1.9')

Wald Test Statistic:  1.511682643482767
p-value:  0.46961535087703543


## Ramsey RESET
Test of functional form for linear models. 

In [29]:
print(model_coffee_ts.ramsey_test())


   power    pvalue
0      2  0.182315
1      3  0.372575
