In [None]:
import numpy as np
import pandas as pd
import glob
import os
import xlrd
import openpyxl
import pickle
import matplotlib.pyplot as plt
from statsmodels.graphics.tsaplots import plot_acf
from statsmodels.stats.diagnostic import acorr_ljungbox


In [79]:
# load dataframes from pickle file
with open('data_clean.pkl', 'rb') as f:
    saved = pickle.load(f)
    price_df = saved['price_df']
    return_df = saved['return_df']
    category_map = saved['category_mapping']

In [80]:
# Check for autocorrelation

# from statsmodels.graphics.tsaplots import plot_pacf
# for col in return_df.columns:
#     plot_pacf(return_df[col], lags=10)
#     plt.title(f"{col}")
#     plt.show()

In [None]:
# Check autocorrelation

ljungbox_result = {}
for col in return_df.columns:
    ljungbox_result[col] = round(acorr_ljungbox(return_df[col], lags=10).iloc[-1]['lb_pvalue'], 4)

sorted_by_ljungbox = sorted(ljungbox_result.items(), key=lambda item: item[1], reverse=True)
sorted_by_ljungbox

[('commodities', np.float64(0.7454)),
 ('bitcoin', np.float64(0.1517)),
 ('us_dollar', np.float64(0.0631)),
 ('inflation', np.float64(0.0372)),
 ('interest_rate', np.float64(0.0073)),
 ('us_equity', np.float64(0.0)),
 ('value', np.float64(0.0)),
 ('small_cap', np.float64(0.0)),
 ('low_vol', np.float64(0.0)),
 ('credit', np.float64(0.0)),
 ('long_vol', np.float64(0.0)),
 ('momentum', np.float64(0.0)),
 ('quality', np.float64(0.0)),
 ('trend', np.float64(0.0)),
 ('equity', np.float64(0.0)),
 ('em_equity', np.float64(0.0))]

Takeaway on autocorrelation
- all the equity and credit related assets show significant autocorrelation

Resolution:
- Applying a rolling window doesnt resolve autocorrelation as it will make the series more correlated.
- Applying differencing on top of the daily returns probably helps but that will also remove plenty information
- Aggregating daily returns to weekly will mitigate the autocorrelation for most of the markets while preserving information
- Randomly permutate the sequence of returns

In [82]:
# Use weekly returns
weekly_returns = return_df.resample('W').mean()

ljungbox_result_wk = {}
for col in weekly_returns.columns:
    ljungbox_result_wk[col] = round(acorr_ljungbox(weekly_returns[col], lags=2).iloc[-1]['lb_pvalue'], 4)

sorted_by_ljungbox_wk = sorted(ljungbox_result_wk.items(), key=lambda item: item[1], reverse=True)
sorted_by_ljungbox_wk

[('em_equity', np.float64(0.7956)),
 ('long_vol', np.float64(0.4152)),
 ('equity', np.float64(0.3179)),
 ('interest_rate', np.float64(0.2974)),
 ('trend', np.float64(0.2559)),
 ('commodities', np.float64(0.2296)),
 ('value', np.float64(0.1419)),
 ('inflation', np.float64(0.1357)),
 ('momentum', np.float64(0.0644)),
 ('us_equity', np.float64(0.0507)),
 ('bitcoin', np.float64(0.049)),
 ('quality', np.float64(0.0483)),
 ('us_dollar', np.float64(0.0442)),
 ('small_cap', np.float64(0.0414)),
 ('low_vol', np.float64(0.0053)),
 ('credit', np.float64(0.0002))]

Autocorrelation con't..

There are still 6 of them showing significant autocorr after aggregating to weekly returns

In [None]:
# Use permutation to break the autocorrelation
seq = np.random.permutation(return_df.shape[0])
sorted_returns = return_df.reset_index().iloc[seq]

ljungbox_result_random = {}
for col in return_df.columns:
    ljungbox_result_random[col] = round(acorr_ljungbox(sorted_returns[col], lags=10).iloc[-1]['lb_pvalue'], 4)

sorted_by_ljungbox_random = sorted(ljungbox_result_random.items(), key=lambda item: item[1], reverse=True)
sorted_by_ljungbox_random


[('trend', np.float64(0.8451)),
 ('long_vol', np.float64(0.7903)),
 ('equity', np.float64(0.788)),
 ('credit', np.float64(0.7741)),
 ('value', np.float64(0.6698)),
 ('us_equity', np.float64(0.6549)),
 ('bitcoin', np.float64(0.642)),
 ('em_equity', np.float64(0.5823)),
 ('low_vol', np.float64(0.576)),
 ('interest_rate', np.float64(0.5113)),
 ('us_dollar', np.float64(0.5012)),
 ('quality', np.float64(0.4811)),
 ('inflation', np.float64(0.4496)),
 ('momentum', np.float64(0.4134)),
 ('commodities', np.float64(0.2703)),
 ('small_cap', np.float64(0.2171))]