# PART 1: OLS and WLS

In [142]:
import numpy as np
import statsmodels.api as sm
from statsmodels.nonparametric.smoothers_lowess import lowess
from skmisc.loess import loess


In [143]:
#Employee Absence Data

#Vector 
Y = np.array([1,0,1,4,3,2,5,6,9,13,15,16])

# (2,12) Matrix 
X = np.array([[1,1],
          [2,1],
          [2,2],
          [3,2],
          [5,4],
          [5,6],
          [6,5],
          [7,4],
          [10,8],
          [11,7],
          [11,9],
          [12,10]])

In [146]:
#OLS Regression
model_ols = sm.OLS(Y, sm.add_constant(X)).fit()
residuals = model_ols.resid
print(model_ols.summary())

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.904
Model:                            OLS   Adj. R-squared:                  0.882
Method:                 Least Squares   F-statistic:                     42.25
Date:                Sun, 10 Mar 2024   Prob (F-statistic):           2.66e-05
Time:                        21:53:00   Log-Likelihood:                -23.294
No. Observations:                  12   AIC:                             52.59
Df Residuals:                       9   BIC:                             54.04
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const         -2.2630      1.096     -2.065      0.0



In [224]:
# WLS using LOESS weights and smoothing
def calculate_loess_weights(X):
    # Create a LOESS object with specified parameters
    l = loess(X, Y, span=0.90, degree=2)
    # Fit the LOESS model
    l.fit()
    # Return the fitted values (weights)
    return l.outputs.fitted_values

# Calculate LOESS weights using X as predictor
weights = 1 / abs(np.log(calculate_loess_weights(X)**2))

print(weights)

[0.27113528 2.15052654 0.32017455 0.39572367 0.42862009 0.59226372
 0.26122321 0.32017455 0.20617489 0.1969437  0.19573041 0.17841788]


In [220]:
# WLS Regression
loess_model_wls = sm.WLS(Y, sm.add_constant(X), weights=weights).fit()

print(loess_model_wls.summary())


                            WLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.909
Model:                            WLS   Adj. R-squared:                  0.888
Method:                 Least Squares   F-statistic:                     44.73
Date:                Sun, 10 Mar 2024   Prob (F-statistic):           2.11e-05
Time:                        22:27:07   Log-Likelihood:                -22.565
No. Observations:                  12   AIC:                             51.13
Df Residuals:                       9   BIC:                             52.58
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const         -2.6427      0.774     -3.416      0.0



In [207]:
#Lowess smoothing residuals
log_sq_residuals = np.log(residuals**2)
smoothed = lowess(log_sq_residuals, Y, frac= 0.9, it=2, is_sorted= True)


#WLS Regression with weights from lowess smoothed residuals
weights = 1 / np.exp(smoothed[:, 1])
lowess_model_wls = sm.WLS(Y, sm.add_constant(X), weights = weights).fit()

print(lowess_model_wls.summary())


                            WLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.887
Model:                            WLS   Adj. R-squared:                  0.862
Method:                 Least Squares   F-statistic:                     35.22
Date:                Sun, 10 Mar 2024   Prob (F-statistic):           5.54e-05
Time:                        22:14:36   Log-Likelihood:                -22.760
No. Observations:                  12   AIC:                             51.52
Df Residuals:                       9   BIC:                             52.98
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const         -1.7817      0.869     -2.051      0.0



In [221]:
#I) Which of these two have the smallest standard errors?
std_err_OLS = model_ols.bse
lowess_std_err_WLS = lowess_model_wls.bse
loess_std_err_WLS = loess_model_wls.bse
print("It can be concluded that the Loess WLS has a smaller standard Error\n",
      f"Standard Error OLS: {std_err_OLS}\n",
      f"Lowess Standard Error WLS: {lowess_std_err_WLS}\n",
      f"Loess Standard Error WLS: {loess_std_err_WLS}")


It can be concluded that the Loess WLS has a smaller standard Error
 Standard Error OLS: [1.09594145 0.4805157  0.60635705]
 Lowess Standard Error WLS: [0.86859375 0.45374034 0.55894293]
 Loess Standard Error WLS: [0.77369087 0.42599256 0.4937408 ]


In [222]:
#II) Which of these two have the largest adjusted R-squareds?
Adj_R_squared_OLS = model_ols.rsquared_adj
lowess_Adj_R_squared_WLS = lowess_model_wls.rsquared_adj
loess_Adj_R_squared_WLS = loess_model_wls.rsquared_adj
print("It can be concluded that the Loess WLS has the largest adjusted R-squareds.\n",
      f"Adj R squared OLS: {Adj_R_squared_OLS}\n",
      f"Lowess Adj R squared WLS: {lowess_Adj_R_squared_WLS}\n",
      f"Loess Adj R squared WLS: {loess_Adj_R_squared_WLS}")


It can be concluded that the Loess WLS has the largest adjusted R-squareds.
 Adj R squared OLS: 0.8823462031307651
 Lowess Adj R squared WLS: 0.8615475993224067
 Loess Adj R squared WLS: 0.8882685380126907


In [223]:
#III) Which of these have the smallest observed p-values?
p_values_OLS = model_ols.pvalues
lowess_p_values_WLS = lowess_model_wls.pvalues
loess_p_values_WLS = loess_model_wls.pvalues
print("It can be concluded that the Loess WLS has the smallest observed p-values\n",
      f"P Values OLS: {p_values_OLS}\n",
      f"Lowess P Values WLS: {lowess_p_values_WLS}\n",
      f"Loess P Values WLS: {loess_p_values_WLS}")

It can be concluded that the Loess WLS has the smallest observed p-values
 P Values OLS: [0.06892819 0.01040338 0.70319426]
 Lowess P Values WLS: [0.07046538 0.00551641 0.38934776]
 Loess P Values WLS: [0.00767955 0.00293146 0.37880568]


#### IV) What can you conclude about WLS solution in comparison to the OLS Solution?

It can be concluded that the WLS solution has a lower standard error, implying that the residuals
from the WLS are closer to the regression line than the OLS. Both the OLS and WLS models have 
relatively high r-squared values indicating a correlation between the independent and dependant 
variables. The WLS solution has decreased heteroscedasticity compared to the OLS solution. 

When comparing loess WLS and the lowess WLS, the loess WLS has lower p-values, higher r-squared values, 
and lower standard error. Overall, the weighted adjustments used in the WLS solution result in less variance.


# PART 2: Covariance Matrix

In [217]:
#Calculating covariance matrix using numpy
covariance_matrix = np.cov(X.T)
print(f" Covariance Matrix:\n {covariance_matrix}")

 Covariance Matrix:
 [[15.47727273 11.65909091]
 [11.65909091  9.71969697]]
