# Regression-based test

In [None]:
import numpy as np
import pandas as pd
from scipy import stats
import statsmodels.api as sm

import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
file_url = 'https://raw.githubusercontent.com/huangyh09/foundation-data-science/main/w8-hypothesis-testing/marketing.csv'
df = pd.read_csv(file_url)
df['constant'] = 1

In [None]:
df.head()

In [None]:
sns.regplot(x='newspaper', y='sales', data=df)

## Wald test (t test on coefficient)

In [None]:
# Fit and summarize OLS model
Y = df['sales']
X0 = df[['constant']]
X1 = df[['constant', 'newspaper']]

mod1 = sm.OLS(Y, X1)
res1 = mod1.fit()

In [None]:
print(res1.summary())

In [None]:
# test statistic: 3.3
# degree of freedom: 198

# two tailed p value
(1 - stats.t.cdf(3.3, df=198)) * 2

## Likelihood ratio test

In [None]:
# Fit and summarize OLS model
Y = df['sales']
X0 = df[['constant']]

mod0 = sm.OLS(Y, X0)
res0 = mod0.fit()

In [None]:
print(res0.summary())

In [None]:
# test statistic

_lambda = - 2 * (res0.llf - res1.llf)
_lambda

In [None]:
# p value (one-tailed)

1 - stats.chi2.cdf(_lambda, df=1)

## Condition on other featues

In [None]:
Y = df['sales']
X2 = df[['constant', 'newspaper', 'facebook']]

mod2 = sm.OLS(Y, X2)
res2 = mod2.fit()

In [None]:
print(res1.summary())