# Residuals Analysis

In [1]:
import numpy as np
import pandas as pd
from statsmodels import regression
import statsmodels.api as sm
import statsmodels.stats.diagnostic as smd
import scipy.stats as stats
import matplotlib.pyplot as plt
import math

import yfinance as yf
import datetime as dt

from jupyterthemes import jtplot
jtplot.style(theme='onedork', figsize=(15, 9))

In [61]:
class ResidualAnalysis():
    def __init__(self, asset, benchmark, start, end):
        self.asset = asset
        self.benchmark = benchmark
        self.start = start
        self.end = end
        
        self.get_data()
    
    def get_data(self):
        asset = yf.download(self.asset, start=self.start, end=self.end)
        benchmark = yf.download(self.benchmark, start=self.start, end=self.end)

        self.asset_returns = np.log(asset['Close'].div(asset['Close'].shift(1))).dropna().values
        self.benchmark_returns = np.log(benchmark['Close'].div(benchmark['Close'].shift(1))).dropna().values
        
        self.linear_regression_model()
    
    def linear_regression_model(self):
        benchmark_returns = sm.add_constant(self.benchmark_returns)        
        self.model = sm.OLS(self.asset_returns, benchmark_returns).fit()

        benchmark_returns = benchmark_returns[:, 1]
        B0, B1 = self.model.params
    
    def heteroscedasticity_test(self):
        model = self.model
        bp_test = smd.het_breuschpagan(model.resid, model.model.exog)

        print(f'Lagrange Multiplier Statistic: {bp_test[0]}')
        print(f'P-value: {bp_test[1]}')
        print(f'f-value: {bp_test[2]}')
        print(f'f_p-value: {bp_test[3]}\n')
        if bp_test[1] > 0.05:
            print('The relationship is not heteroscedastic.')
        if bp_test[1] < 0.05:
            print('The relationship is heteroscedastic.')
    
    def autocorrelation_test(self):
        ljung_box = smd.acorr_ljungbox(self.asset_returns, auto_lag=True, return_df=True)
        
        print(f'P-Values: {ljung_box[1]}\n')
        if any(ljung_box[1] < 0.05):
            print('The residuals are autocorrelated.')
        else:
            print('The residuals are not autocorrelated.')
    
    
    

In [58]:
test = ResidualAnalysis('BRK-B', 'SPY', dt.datetime(2020, 1, 1), dt.datetime(2022, 1, 1))

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


In [64]:
test.heteroscedasticity_test()

<statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x0DF1C430>
Lagrange Multiplier Statistic: 2.4960158149071887
P-value: 0.11413471264994834
f-value: 2.4984845157699653
f_p-value: 0.11458574471115253

The relationship is not heteroscedastic.


In [63]:
test.autocorrelation_test()

P-Values: [4.88748060e-10 3.85505098e-13 1.18962016e-12 2.15269042e-12
 3.19816589e-13 4.58693925e-16 7.49079617e-25 4.73287786e-29
 1.80144112e-30 6.24623156e-30 2.60332219e-29 9.52396942e-29
 6.80795458e-30 6.46055690e-31 3.91996077e-32]

The residuals are autocorrelated.
